Inefficient but working forward-forward implementation

main
Shad Amethyst 2 years ago
parent 6d45eafbe7
commit a5237a8ef1

@ -0,0 +1,137 @@
#![feature(generic_arg_infer)]
use nalgebra::{dvector, DVector};
#[allow(unused_imports)]
use neuramethyst::derivable::activation::{LeakyRelu, Linear, Relu, Tanh};
use neuramethyst::derivable::regularize::NeuraL1;
use neuramethyst::gradient_solver::NeuraForwardForward;
use neuramethyst::prelude::*;
use rand::Rng;
fn main() {
let mut network = neura_sequential![
neura_layer!("dense", 10).regularization(NeuraL1(0.001)),
neura_layer!("dropout", 0.25),
neura_layer!("normalize"),
neura_layer!("dense", 6).regularization(NeuraL1(0.001)),
]
.construct(NeuraShape::Vector(3))
.unwrap();
let inputs = (0..1).cycle().map(move |_| {
let mut rng = rand::thread_rng();
let category = rng.gen_bool(0.5);
let good = rng.gen_bool(0.5);
let (x, y) = if category {
let radius: f32 = rng.gen_range(0.0..2.0);
let angle = rng.gen_range(0.0..std::f32::consts::TAU);
(angle.cos() * radius, angle.sin() * radius)
} else {
let radius: f32 = rng.gen_range(3.0..5.0);
let angle = rng.gen_range(0.0..std::f32::consts::TAU);
(angle.cos() * radius, angle.sin() * radius)
};
if good {
(dvector![x, y, category as u8 as f32], true)
} else {
(dvector![x, y, 1.0 - category as u8 as f32], false)
}
});
let test_inputs: Vec<_> = inputs.clone().filter(|(_, good)| *good).take(10).collect();
let threshold = 0.25f32;
if std::env::args().any(|arg| arg == "draw") {
for epoch in 0..200 {
let mut trainer = NeuraBatchedTrainer::new(0.03, 10);
trainer.batch_size = 10;
trainer.train(
&NeuraForwardForward::new(Tanh, threshold as f64),
&mut network,
inputs.clone(),
&test_inputs,
);
// let network = network.clone().trim_tail().trim_tail();
draw_neuron_activation(
|input| {
let cat0 = network.eval(&dvector![input[0] as f32, input[1] as f32, 0.0]);
let cat1 = network.eval(&dvector![input[0] as f32, input[1] as f32, 1.0]);
let cat0_good = cat0.map(|x| x * x).sum();
let cat1_good = cat1.map(|x| x * x).sum();
let estimation = cat1_good / (cat0_good + cat1_good);
let cat0_norm = cat0 / cat0_good.sqrt();
let mut cat0_rgb = DVector::from_element(3, 0.0);
for i in 0..cat0_norm.len() {
cat0_rgb[i % 3] += cat0_norm[i].abs();
}
(cat0_rgb * estimation)
.into_iter()
.map(|x| *x as f64)
.collect()
},
6.0,
);
println!("{}", epoch);
std::thread::sleep(std::time::Duration::new(0, 50_000_000));
}
} else {
let mut trainer = NeuraBatchedTrainer::new(0.03, 20 * 50);
trainer.batch_size = 10;
trainer.log_iterations = 20;
trainer.train(
&NeuraForwardForward::new(Tanh, threshold as f64),
&mut network,
inputs.clone(),
&test_inputs,
);
// println!("{}", String::from("\n").repeat(64));
// draw_neuron_activation(|input| network.eval(&input).into_iter().collect(), 6.0);
}
}
// TODO: move this to the library?
fn draw_neuron_activation<F: Fn([f64; 2]) -> Vec<f64>>(callback: F, scale: f64) {
use viuer::Config;
const WIDTH: u32 = 64;
const HEIGHT: u32 = 64;
let mut image = image::RgbImage::new(WIDTH, HEIGHT);
fn sigmoid(x: f64) -> f64 {
0.1 + 0.9 * x.abs().powf(0.8)
}
for y in 0..HEIGHT {
let y2 = 2.0 * y as f64 / HEIGHT as f64 - 1.0;
for x in 0..WIDTH {
let x2 = 2.0 * x as f64 / WIDTH as f64 - 1.0;
let activation = callback([x2 * scale, y2 * scale]);
let r = (sigmoid(activation.get(0).copied().unwrap_or(-1.0)) * 255.0).floor() as u8;
let g = (sigmoid(activation.get(1).copied().unwrap_or(-1.0)) * 255.0).floor() as u8;
let b = (sigmoid(activation.get(2).copied().unwrap_or(-1.0)) * 255.0).floor() as u8;
*image.get_pixel_mut(x, y) = image::Rgb([r, g, b]);
}
}
let config = Config {
use_kitty: false,
truecolor: true,
// absolute_offset: false,
..Default::default()
};
viuer::print(&image::DynamicImage::ImageRgb8(image), &config).unwrap();
}

@ -47,20 +47,24 @@ fn main() {
trainer.batch_size = 10; trainer.batch_size = 10;
trainer.train( trainer.train(
NeuraBackprop::new(CrossEntropy), &NeuraBackprop::new(CrossEntropy),
&mut network, &mut network,
inputs.clone(), inputs.clone(),
&test_inputs, &test_inputs,
); );
let network = network.clone(); let network_trimmed = network.clone().trim_tail().trim_tail();
draw_neuron_activation( draw_neuron_activation(
|input| { |input| {
network let output = network.eval(&dvector![input[0] as f32, input[1] as f32]);
.eval(&dvector![input[0] as f32, input[1] as f32]) let estimation = output[0] / (output[0] + output[1]);
let color = network_trimmed.eval(&dvector![input[0] as f32, input[1] as f32]);
(&color / color.map(|x| x * x).sum() * estimation)
.into_iter() .into_iter()
.map(|x| *x as f64) .map(|x| x.abs() as f64)
.collect() .collect::<Vec<_>>()
}, },
6.0, 6.0,
); );
@ -74,7 +78,7 @@ fn main() {
trainer.log_iterations = 20; trainer.log_iterations = 20;
trainer.train( trainer.train(
NeuraBackprop::new(CrossEntropy), &NeuraBackprop::new(CrossEntropy),
&mut network, &mut network,
inputs.clone(), inputs.clone(),
&test_inputs, &test_inputs,
@ -101,7 +105,7 @@ fn draw_neuron_activation<F: Fn([f64; 2]) -> Vec<f64>>(callback: F, scale: f64)
let mut image = image::RgbImage::new(WIDTH, HEIGHT); let mut image = image::RgbImage::new(WIDTH, HEIGHT);
fn sigmoid(x: f64) -> f64 { fn sigmoid(x: f64) -> f64 {
1.0 / (1.0 + (-x * 3.0).exp()) 1.9 / (1.0 + (-x * 3.0).exp()) - 0.9
} }
for y in 0..HEIGHT { for y in 0..HEIGHT {
@ -119,6 +123,7 @@ fn draw_neuron_activation<F: Fn([f64; 2]) -> Vec<f64>>(callback: F, scale: f64)
let config = Config { let config = Config {
use_kitty: false, use_kitty: false,
truecolor: true,
// absolute_offset: false, // absolute_offset: false,
..Default::default() ..Default::default()
}; };

@ -38,7 +38,7 @@ fn main() {
trainer.learning_momentum = 0.01; trainer.learning_momentum = 0.01;
trainer.train( trainer.train(
NeuraBackprop::new(Euclidean), &NeuraBackprop::new(Euclidean),
&mut network, &mut network,
cycle_shuffling(inputs.iter().cloned(), rand::thread_rng()), cycle_shuffling(inputs.iter().cloned(), rand::thread_rng()),
&inputs, &inputs,

@ -1,10 +1,9 @@
use num::ToPrimitive; use num::ToPrimitive;
use crate::{network::NeuraTrainableNetworkBase, derivable::NeuraLoss, layer::NeuraTrainableLayer}; use crate::{derivable::NeuraLoss, layer::NeuraTrainableLayer, network::NeuraTrainableNetworkBase};
use super::*; use super::*;
pub struct NeuraBackprop<Loss> { pub struct NeuraBackprop<Loss> {
loss: Loss, loss: Loss,
} }

@ -0,0 +1,191 @@
use nalgebra::{DVector, Scalar};
use num::{traits::NumAssignOps, Float, ToPrimitive};
use crate::derivable::NeuraDerivable;
use super::*;
// TODO: add `max_depth: usize`
pub struct NeuraForwardForward<Act> {
threshold: f64,
activation: Act,
}
impl<Act: Clone + NeuraDerivable<f64>> NeuraForwardForward<Act> {
pub fn new(activation: Act, threshold: f64) -> Self {
Self {
threshold,
activation,
}
}
}
struct NeuraForwardPair<Act> {
threshold: f64,
maximize: bool,
activation: Act,
}
impl<
F,
Act: Clone + NeuraDerivable<f64>,
Input,
Trainable: NeuraTrainableNetwork<Input, NeuraForwardPair<Act>, Output = DVector<F>>,
> NeuraGradientSolver<Input, bool, Trainable> for NeuraForwardForward<Act>
where
F: ToPrimitive,
{
fn get_gradient(
&self,
trainable: &Trainable,
input: &Input,
target: &bool,
) -> Trainable::Gradient {
let target = *target;
trainable.traverse(
input,
&NeuraForwardPair {
threshold: self.threshold,
maximize: target,
activation: self.activation.clone(),
},
)
}
fn score(&self, trainable: &Trainable, input: &Input, target: &bool) -> f64 {
let output = trainable.eval(input);
let goodness = output
.iter()
.map(|x| x.to_f64().unwrap())
.reduce(|acc, x| acc + x * x)
.unwrap_or(0.0);
let goodness = goodness - self.threshold;
let goodness = self.activation.eval(goodness);
// Try to normalize the goodness so that if `Act([-threshold; +inf]) = [α; 1]`, `goodness ∈ [0; 1]`
let activation_zero = self.activation.eval(-self.threshold);
const EPSILON: f64 = 0.01;
let goodness = if activation_zero < 1.0 - EPSILON {
(goodness - activation_zero) / (1.0 - activation_zero)
} else {
goodness
};
if *target {
1.0 - goodness
} else {
goodness
}
}
}
impl<Act> NeuraGradientSolverBase for NeuraForwardPair<Act> {
type Output<NetworkInput, NetworkGradient> = NetworkGradient;
}
impl<Act, LayerOutput> NeuraGradientSolverFinal<LayerOutput> for NeuraForwardPair<Act> {
fn eval_final(&self, _output: LayerOutput) -> Self::Output<LayerOutput, ()> {
()
}
}
impl<F: Float + Scalar + NumAssignOps, Act: NeuraDerivable<F>>
NeuraGradientSolverTransient<DVector<F>> for NeuraForwardPair<Act>
{
fn eval_layer<
Input,
NetworkGradient,
RecGradient,
Layer: NeuraTrainableLayer<Input, Output = DVector<F>>,
>(
&self,
layer: &Layer,
input: &Input,
rec_gradient: RecGradient,
combine_gradients: impl Fn(Layer::Gradient, RecGradient) -> NetworkGradient,
) -> Self::Output<Input, NetworkGradient> {
let output = layer.eval(input);
let goodness = output
.iter()
.copied()
.reduce(|acc, x| acc + x * x)
.unwrap_or(F::zero());
let goodness = if self.maximize {
goodness - F::from(self.threshold).unwrap()
} else {
F::from(self.threshold).unwrap() - goodness
};
// We skip self.activation.eval(goodness)
let two = F::from(2.0).unwrap();
// The original formula does not have a 1/2 term,
// so we must multiply by 2
let mut goodness_derivative = output * (two * self.activation.derivate(goodness));
if self.maximize {
goodness_derivative = -goodness_derivative;
}
// TODO: split backprop_layer into eval_training, get_gradient and get_backprop
let (_, layer_gradient) = layer.backprop_layer(input, goodness_derivative);
combine_gradients(layer_gradient, rec_gradient)
}
}
#[cfg(test)]
mod test {
use rand::Rng;
use super::*;
use crate::{
derivable::activation::{Relu, Tanh},
prelude::*,
utils::uniform_vector,
};
#[test]
fn test_forward_forward() {
let network = neura_sequential![
neura_layer!("dense", 10).activation(Relu),
neura_layer!("normalize"),
neura_layer!("dense", 4).activation(Relu),
neura_layer!("normalize"),
neura_layer!("dense", 1),
]
.construct(NeuraShape::Vector(10))
.unwrap();
let fwd_solver = NeuraForwardForward::new(Tanh, 0.25);
fwd_solver.get_gradient(&network, &uniform_vector(10).map(|x| x as f32), &true);
}
#[test]
fn test_forward_forward_train() {
let mut network = neura_sequential![
neura_layer!("dense", 5).activation(Relu),
neura_layer!("normalize"),
neura_layer!("dense", 3).activation(Relu),
neura_layer!("normalize"),
neura_layer!("dense", 1)
]
.construct(NeuraShape::Vector(4))
.unwrap();
let solver = NeuraForwardForward::new(Tanh, 0.25);
let trainer = NeuraBatchedTrainer::new(0.01, 20);
let inputs = (0..1).cycle().map(|_| {
let mut rng = rand::thread_rng();
(uniform_vector(4).map(|x| x as f32), rng.gen_bool(0.5))
});
let test_inputs: Vec<_> = inputs.clone().take(10).collect();
trainer.train(&solver, &mut network, inputs, test_inputs.as_slice());
}
}

@ -1,6 +1,9 @@
mod backprop; mod backprop;
pub use backprop::NeuraBackprop; pub use backprop::NeuraBackprop;
mod forward_forward;
pub use forward_forward::NeuraForwardForward;
use crate::{ use crate::{
layer::NeuraTrainableLayer, layer::NeuraTrainableLayer,
network::{NeuraTrainableNetwork, NeuraTrainableNetworkBase}, network::{NeuraTrainableNetwork, NeuraTrainableNetworkBase},

@ -2,6 +2,7 @@ use crate::algebra::NeuraVectorSpace;
pub mod dense; pub mod dense;
pub mod dropout; pub mod dropout;
pub mod normalize;
pub mod softmax; pub mod softmax;
#[derive(Clone, Copy, PartialEq, Debug)] #[derive(Clone, Copy, PartialEq, Debug)]
@ -124,4 +125,8 @@ macro_rules! neura_layer {
( "softmax" ) => { ( "softmax" ) => {
$crate::layer::softmax::NeuraSoftmaxLayer::new() $crate::layer::softmax::NeuraSoftmaxLayer::new()
}; };
( "normalize" ) => {
$crate::layer::normalize::NeuraNormalizeLayer::new()
};
} }

@ -0,0 +1,116 @@
use nalgebra::{DVector, Scalar};
use num::{traits::NumAssignOps, Float};
use super::*;
/// A layer that normalizes and centers its input, as follows:
///
/// ```no_rust
/// μ = sum_i(x_i) / n
/// σ = sum_i((x_i - μ)^2) / n
/// y_i = (x_i - μ) / σ
/// ```
#[derive(Debug, Clone, Copy)]
pub struct NeuraNormalizeLayer {
shape: NeuraShape,
}
impl NeuraNormalizeLayer {
pub fn new() -> Self {
Self {
shape: NeuraShape::Vector(0),
}
}
}
impl NeuraPartialLayer for NeuraNormalizeLayer {
type Constructed = NeuraNormalizeLayer;
type Err = ();
fn construct(self, input_shape: NeuraShape) -> Result<Self::Constructed, Self::Err> {
Ok(Self { shape: input_shape })
}
fn output_shape(constructed: &Self::Constructed) -> NeuraShape {
constructed.shape
}
}
impl<F: Float + Scalar> NeuraLayer<DVector<F>> for NeuraNormalizeLayer {
type Output = DVector<F>;
fn eval(&self, input: &DVector<F>) -> Self::Output {
let (mean, variance, _) = mean_variance(input);
let stddev = F::sqrt(variance);
let mut output = input.clone();
for item in &mut output {
*item = (*item - mean) / stddev;
}
output
}
}
impl<F: Float + Scalar + NumAssignOps> NeuraTrainableLayer<DVector<F>> for NeuraNormalizeLayer {
type Gradient = ();
fn backprop_layer(
&self,
input: &DVector<F>,
epsilon: Self::Output,
) -> (DVector<F>, Self::Gradient) {
let (mean, variance, len) = mean_variance(input);
let stddev = F::sqrt(variance);
let input_centered = input.clone().map(|x| x - mean);
let mut jacobian_partial = &input_centered * input_centered.transpose();
jacobian_partial /= -variance * (stddev * len);
// Apply the 1/σ * dμ/dx_i term
for value in jacobian_partial.iter_mut() {
*value += F::one() / (stddev * len);
}
let mut epsilon_out = jacobian_partial * &epsilon;
// Apply the δ_{ik}/σ term
for i in 0..epsilon_out.len() {
epsilon_out[i] += epsilon[i] / stddev;
}
(epsilon_out, ())
}
fn default_gradient(&self) -> Self::Gradient {
()
}
fn regularize_layer(&self) -> Self::Gradient {
()
}
fn apply_gradient(&mut self, _gradient: &Self::Gradient) {
// Noop
}
}
fn mean_variance<'a, F: Float + Scalar>(input: impl IntoIterator<Item = &'a F>) -> (F, F, F) {
// Quickly compute mean and variance in one pass
let mut count = 0;
let mut sum = F::zero();
let mut sum_squared = F::zero();
for &value in input.into_iter() {
count += 1;
sum = sum + value;
sum_squared = sum_squared + value * value;
}
let len = F::from(count).unwrap();
let mean = sum / len;
let variance = sum_squared / len - mean * mean;
(mean, variance, len)
}

@ -3,9 +3,9 @@
pub mod algebra; pub mod algebra;
pub mod derivable; pub mod derivable;
pub mod gradient_solver;
pub mod layer; pub mod layer;
pub mod network; pub mod network;
pub mod gradient_solver;
pub mod train; pub mod train;
mod utils; mod utils;
@ -18,10 +18,10 @@ pub mod prelude {
pub use crate::{neura_layer, neura_sequential}; pub use crate::{neura_layer, neura_sequential};
// Structs and traits // Structs and traits
pub use crate::gradient_solver::NeuraBackprop;
pub use crate::layer::*; pub use crate::layer::*;
pub use crate::network::sequential::{ pub use crate::network::sequential::{
NeuraSequential, NeuraSequentialConstruct, NeuraSequentialTail, NeuraSequential, NeuraSequentialConstruct, NeuraSequentialTail,
}; };
pub use crate::gradient_solver::NeuraBackprop;
pub use crate::train::NeuraBatchedTrainer; pub use crate::train::NeuraBatchedTrainer;
} }

@ -1,4 +1,6 @@
use crate::{algebra::NeuraVectorSpace, layer::NeuraLayer, gradient_solver::NeuraGradientSolverBase}; use crate::{
algebra::NeuraVectorSpace, gradient_solver::NeuraGradientSolverBase, layer::NeuraLayer,
};
pub mod sequential; pub mod sequential;

@ -1,7 +1,7 @@
use super::{NeuraTrainableNetwork, NeuraTrainableNetworkBase}; use super::{NeuraTrainableNetwork, NeuraTrainableNetworkBase};
use crate::{ use crate::{
layer::{NeuraLayer, NeuraPartialLayer, NeuraShape, NeuraTrainableLayer},
gradient_solver::{NeuraGradientSolverFinal, NeuraGradientSolverTransient}, gradient_solver::{NeuraGradientSolverFinal, NeuraGradientSolverTransient},
layer::{NeuraLayer, NeuraPartialLayer, NeuraShape, NeuraTrainableLayer},
}; };
mod construct; mod construct;
@ -212,8 +212,8 @@ where
} }
} }
impl<Input: Clone, Optimizer: NeuraGradientSolverFinal<Input>> NeuraTrainableNetwork<Input, Optimizer> impl<Input: Clone, Optimizer: NeuraGradientSolverFinal<Input>>
for () NeuraTrainableNetwork<Input, Optimizer> for ()
{ {
fn traverse( fn traverse(
&self, &self,

@ -1,5 +1,6 @@
use crate::{ use crate::{
algebra::NeuraVectorSpace, network::NeuraTrainableNetworkBase, gradient_solver::NeuraGradientSolver, algebra::NeuraVectorSpace, gradient_solver::NeuraGradientSolver,
network::NeuraTrainableNetworkBase,
}; };
#[non_exhaustive] #[non_exhaustive]
@ -77,7 +78,7 @@ impl NeuraBatchedTrainer {
Inputs: IntoIterator<Item = (Input, Target)>, Inputs: IntoIterator<Item = (Input, Target)>,
>( >(
&self, &self,
gradient_solver: GradientSolver, gradient_solver: &GradientSolver,
network: &mut Network, network: &mut Network,
inputs: Inputs, inputs: Inputs,
test_inputs: &[(Input, Target)], test_inputs: &[(Input, Target)],
@ -140,10 +141,10 @@ mod test {
use crate::{ use crate::{
assert_approx, assert_approx,
derivable::{activation::Linear, loss::Euclidean, regularize::NeuraL0, NeuraLoss}, derivable::{activation::Linear, loss::Euclidean, regularize::NeuraL0, NeuraLoss},
gradient_solver::NeuraBackprop,
layer::{dense::NeuraDenseLayer, NeuraLayer}, layer::{dense::NeuraDenseLayer, NeuraLayer},
network::sequential::{NeuraSequential, NeuraSequentialTail}, network::sequential::{NeuraSequential, NeuraSequentialTail},
neura_sequential, neura_sequential,
gradient_solver::NeuraBackprop,
}; };
#[test] #[test]

Loading…
Cancel
Save