From 6c1d6874d7a7e1d40694141ad207b4923b34c73a Mon Sep 17 00:00:00 2001 From: Adrien Burgun Date: Sun, 16 Apr 2023 12:38:18 +0200 Subject: [PATCH] :recycle: Implement and transition to NeuraMatrix and NeuraVector, to prevent stack overflows --- .gitignore | 1 + Cargo.toml | 3 + examples/bivariate.rs | 9 +- examples/convolution.rs | 77 ++++++++ examples/xor.rs | 15 +- src/algebra/matrix.rs | 292 +++++++++++++++++++++++++++++ src/{algebra.rs => algebra/mod.rs} | 24 +++ src/algebra/vector.rs | 278 +++++++++++++++++++++++++++ src/derivable/loss.rs | 22 ++- src/layer/dense.rs | 55 ++++-- src/layer/dropout.rs | 14 +- src/layer/mod.rs | 22 +++ src/layer/one_hot.rs | 26 ++- src/layer/reshape.rs | 147 +++++++++++++++ src/layer/softmax.rs | 38 ++-- src/lib.rs | 5 +- src/network/sequential.rs | 32 ++-- src/train.rs | 54 ++++-- src/utils.rs | 105 +---------- 19 files changed, 1006 insertions(+), 213 deletions(-) create mode 100644 examples/convolution.rs create mode 100644 src/algebra/matrix.rs rename src/{algebra.rs => algebra/mod.rs} (84%) create mode 100644 src/algebra/vector.rs create mode 100644 src/layer/reshape.rs diff --git a/.gitignore b/.gitignore index 4fffb2f..d461ead 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /target /Cargo.lock +/data diff --git a/Cargo.toml b/Cargo.toml index d245660..48cafe8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,7 +6,9 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +boxed-array = "0.1.0" ndarray = "^0.15" +num = "^0.4" # num-traits = "0.2.15" rand = "^0.8" rand_distr = "0.4.3" @@ -14,3 +16,4 @@ rand_distr = "0.4.3" [dev-dependencies] image = "0.24.6" viuer = "0.6.2" +rust-mnist = "0.2.0" diff --git a/examples/bivariate.rs b/examples/bivariate.rs index 4b6e0bf..e03744f 100644 --- a/examples/bivariate.rs +++ b/examples/bivariate.rs @@ -31,7 +31,7 @@ fn main() { (angle.cos() * radius, angle.sin() * radius) }; - ([x, y], neuramethyst::one_hot::<2>(category)) + ([x, y].into(), neuramethyst::one_hot::<2>(category)) }); let test_inputs: Vec<_> = inputs.clone().take(10).collect(); @@ -49,7 +49,10 @@ fn main() { ); let network = network.clone(); - draw_neuron_activation(|input| network.eval(&input).into_iter().collect(), 6.0); + draw_neuron_activation( + |input| network.eval(&input.into()).into_iter().collect(), + 6.0, + ); println!("{}", epoch); std::thread::sleep(std::time::Duration::new(0, 50_000_000)); @@ -72,7 +75,7 @@ fn main() { let mut file = std::fs::File::create("target/bivariate.csv").unwrap(); for (input, _target) in test_inputs { - let guess = neuramethyst::argmax(&network.eval(&input)); + let guess = neuramethyst::argmax(network.eval(&input).as_ref()); writeln!(&mut file, "{},{},{}", input[0], input[1], guess).unwrap(); } } diff --git a/examples/convolution.rs b/examples/convolution.rs new file mode 100644 index 0000000..8cbeeaa --- /dev/null +++ b/examples/convolution.rs @@ -0,0 +1,77 @@ +#![feature(generic_arg_infer)] +// #![feature(generic_const_exprs)] + +use neuramethyst::algebra::NeuraVector; +use rust_mnist::Mnist; + +use neuramethyst::derivable::activation::{Linear, Relu}; +use neuramethyst::derivable::loss::CrossEntropy; +use neuramethyst::{cycle_shuffling, one_hot, prelude::*}; + +fn main() { + const TRAIN_SIZE: usize = 100; + + let Mnist { + train_data: train_images, + train_labels, + test_data: test_images, + test_labels, + .. + } = Mnist::new("data/"); + + let train_images = train_images + .into_iter() + .map(|raw| { + raw.into_iter() + .map(|x| x as f64 / 255.0) + .collect::>() + }) + .take(TRAIN_SIZE); + let train_labels = train_labels + .into_iter() + .map(|x| one_hot::<10>(x as usize)) + .take(TRAIN_SIZE); + + let test_images = test_images + .into_iter() + .map(|raw| { + raw.into_iter() + .map(|x| x as f64 / 255.0) + .collect::>() + }) + .take(TRAIN_SIZE / 6); + let test_labels = test_labels + .into_iter() + .map(|x| one_hot::<10>(x as usize)) + .take(TRAIN_SIZE / 6); + + let train_iter = cycle_shuffling( + train_images.zip(train_labels.into_iter()), + rand::thread_rng(), + ); + + let test_inputs: Vec<_> = test_images.zip(test_labels.into_iter()).collect(); + + let mut network = neura_sequential![ + neura_layer!("dense", { 28 * 28 }, 200; Relu), + neura_layer!("dropout", 0.5), + neura_layer!("dense", 100; Relu), + neura_layer!("dropout", 0.5), + neura_layer!("dense", 30; Relu), + neura_layer!("dropout", 0.5), + neura_layer!("dense", 10; Linear), + neura_layer!("softmax") + ]; + + let mut trainer = NeuraBatchedTrainer::new(0.03, TRAIN_SIZE * 10); + trainer.log_iterations = (TRAIN_SIZE / 128).max(1); + trainer.batch_size = 128; + trainer.learning_momentum = 0.001; + + trainer.train( + NeuraBackprop::new(CrossEntropy), + &mut network, + train_iter, + &test_inputs, + ); +} diff --git a/examples/xor.rs b/examples/xor.rs index b1e35d6..759dd9d 100644 --- a/examples/xor.rs +++ b/examples/xor.rs @@ -1,8 +1,9 @@ #![feature(generic_arg_infer)] +use neuramethyst::algebra::NeuraVector; use neuramethyst::derivable::activation::Relu; use neuramethyst::derivable::loss::Euclidean; -use neuramethyst::prelude::*; +use neuramethyst::{cycle_shuffling, prelude::*}; fn main() { let mut network = neura_sequential![ @@ -11,14 +12,14 @@ fn main() { neura_layer!("dense", 1; Relu) ]; - let inputs = [ - ([0.0, 0.0], [0.0]), - ([0.0, 1.0], [1.0]), - ([1.0, 0.0], [1.0]), - ([1.0, 1.0], [0.0]), + let inputs: [(NeuraVector<2, f64>, NeuraVector<1, f64>); 4] = [ + ([0.0, 0.0].into(), [0.0].into()), + ([0.0, 1.0].into(), [1.0].into()), + ([1.0, 0.0].into(), [1.0].into()), + ([1.0, 1.0].into(), [0.0].into()), ]; - for (input, target) in inputs { + for (input, target) in &inputs { println!( "Input: {:?}, target: {}, actual: {:.3}", &input, diff --git a/src/algebra/matrix.rs b/src/algebra/matrix.rs new file mode 100644 index 0000000..fbe7a3c --- /dev/null +++ b/src/algebra/matrix.rs @@ -0,0 +1,292 @@ +use std::borrow::Borrow; + +use super::*; +use boxed_array::from_cloned; +use num::Float; + +/// A simple abstraction around `[[F; WIDTH]; HEIGHT]`, +/// which ensures that all allocations that depend on `WIDTH` or `HEIGHT` are done on the heap, +/// without losing the length information. +#[derive(Clone, Debug, PartialEq)] +pub struct NeuraMatrix { + pub data: Box<[[F; WIDTH]; HEIGHT]>, +} + +impl NeuraMatrix { + #[inline(always)] + pub fn from_value(value: F) -> Self + where + F: Clone, + { + Self { + data: from_cloned(&value), + } + } + + #[inline(always)] + pub fn get(&self, x: usize, y: usize) -> Option<&F> { + if x >= WIDTH || y >= HEIGHT { + return None; + } + + Some(&self.data[y][x]) + } +} + +impl NeuraMatrix { + /// Returns `self * vector` + pub fn multiply_vector(&self, vector: impl Borrow<[F; WIDTH]>) -> NeuraVector { + let mut result: NeuraVector = NeuraVector::from_value(F::zero()); + let vector = vector.borrow(); + + for i in 0..HEIGHT { + let mut sum = F::zero(); + for k in 0..WIDTH { + sum = sum + self.data[i][k] * vector[k]; + } + result[i] = sum; + } + + result + } + + /// Returns `transpose(self) * vector`, + /// without actually performing the transpose operation + pub fn transpose_multiply_vector( + &self, + vector: impl AsRef<[F; HEIGHT]>, + ) -> NeuraVector { + let mut result: NeuraVector = NeuraVector::from_value(F::zero()); + let vector = vector.as_ref(); + + for j in 0..WIDTH { + let mut sum = F::zero(); + for k in 0..HEIGHT { + sum = sum + self.data[k][j] * vector[k]; + } + result[j] = sum; + } + + result + } +} + +impl NeuraMatrix { + pub fn from_diagonal(vector: impl AsRef<[F; LENGTH]>) -> Self { + let mut result: NeuraMatrix = NeuraMatrix::default(); + let vector = vector.as_ref(); + + for i in 0..LENGTH { + result[i][i] = vector[i].clone(); + } + + result + } +} + +impl + Into> NeuraVectorSpace + for NeuraMatrix +{ + fn add_assign(&mut self, other: &Self) { + for i in 0..HEIGHT { + for j in 0..WIDTH { + self.data[i][j] = self.data[i][j] + other.data[i][j]; + } + } + } + + fn mul_assign(&mut self, by: f64) { + let by: F = by.into(); + for i in 0..HEIGHT { + for j in 0..WIDTH { + self.data[i][j] = self.data[i][j] * by; + } + } + } + + #[inline(always)] + fn zero() -> Self { + Self::from_value(F::zero()) + } + + fn norm_squared(&self) -> f64 { + let mut sum = F::zero(); + + for i in 0..HEIGHT { + for j in 0..WIDTH { + let x = self.data[i][j]; + sum = sum + x * x; + } + } + + sum.into() + } +} + +impl From> + for NeuraMatrix +{ + #[inline] + fn from(data: Box<[[F; WIDTH]; HEIGHT]>) -> Self { + Self { data } + } +} + +impl From> + for Box<[[F; WIDTH]; HEIGHT]> +{ + #[inline] + fn from(matrix: NeuraMatrix) -> Self { + matrix.data + } +} + +impl From<&[[F; WIDTH]; HEIGHT]> + for NeuraMatrix +{ + /// **Warning:** when using this function, make sure that the array is not allocated on the stack + /// or that `WIDTH` and `HEIGHT` are bounded. + #[inline] + fn from(data: &[[F; WIDTH]; HEIGHT]) -> Self { + let mut res = Self::default(); + + for i in 0..HEIGHT { + for j in 0..WIDTH { + res[i][j] = data[i][j].clone(); + } + } + + res + } +} + +impl From<[[F; WIDTH]; HEIGHT]> + for NeuraMatrix +{ + /// **Warning:** when using this function, make sure that `WIDTH` and `HEIGHT` are bounded. + fn from(data: [[F; WIDTH]; HEIGHT]) -> Self { + Self { + data: Box::new(data), + } + } +} + +impl std::ops::Index<(usize, usize)> + for NeuraMatrix +{ + type Output = F; + + #[inline] + fn index(&self, index: (usize, usize)) -> &Self::Output { + if index.0 >= WIDTH || index.1 >= HEIGHT { + panic!( + "Index out of bound: tried indexing matrix element ({}, {}), which is outside of NeuraMatrix<{}, {}, _>", + index.0, index.1, WIDTH, HEIGHT + ); + } + + &self.data[index.1][index.0] + } +} + +impl std::ops::IndexMut<(usize, usize)> + for NeuraMatrix +{ + #[inline] + fn index_mut(&mut self, index: (usize, usize)) -> &mut Self::Output { + if index.0 >= WIDTH || index.1 >= HEIGHT { + panic!( + "Index out of bound: tried indexing matrix element ({}, {}), which is outside of NeuraMatrix<{}, {}, _>", + index.0, index.1, WIDTH, HEIGHT + ); + } + + &mut self.data[index.1][index.0] + } +} + +impl std::ops::Index + for NeuraMatrix +{ + type Output = [F; WIDTH]; + + #[inline(always)] + fn index(&self, index: usize) -> &Self::Output { + if index >= HEIGHT { + panic!( + "Index out of bound: tried indexing matrix row {}, which is outside of NeuraMatrix<{}, {}, _>", + index, WIDTH, HEIGHT + ); + } + + &self.data[index] + } +} + +impl std::ops::IndexMut + for NeuraMatrix +{ + #[inline(always)] + fn index_mut(&mut self, index: usize) -> &mut Self::Output { + if index >= HEIGHT { + panic!( + "Index out of bound: tried indexing matrix row {}, which is outside of NeuraMatrix<{}, {}, _>", + index, WIDTH, HEIGHT + ); + } + + &mut self.data[index] + } +} + +impl AsRef<[[F; WIDTH]; HEIGHT]> + for NeuraMatrix +{ + #[inline(always)] + fn as_ref(&self) -> &[[F; WIDTH]; HEIGHT] { + &self.data + } +} + +impl Borrow<[[F; WIDTH]; HEIGHT]> + for NeuraMatrix +{ + #[inline(always)] + fn borrow(&self) -> &[[F; WIDTH]; HEIGHT] { + &self.data + } +} + +impl Default + for NeuraMatrix +{ + #[inline(always)] + fn default() -> Self { + Self::from_value(F::default()) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_index() { + let mut matrix: NeuraMatrix<1000, 1000, f64> = NeuraMatrix::from_value(0.0); + + matrix[100][200] = 0.3; + assert_eq!(matrix[(200, 100)], 0.3); + matrix[(999, 999)] = 0.5; + assert_eq!(matrix[999][999], 0.5); + } + + #[test] + #[should_panic( + expected = "Index out of bound: tried indexing matrix row 100, which is outside of NeuraMatrix<100, 100, _>" + )] + fn test_index_oob() { + let matrix: NeuraMatrix<100, 100, f64> = NeuraMatrix::from_value(0.0); + + let _ = matrix[100]; + } +} diff --git a/src/algebra.rs b/src/algebra/mod.rs similarity index 84% rename from src/algebra.rs rename to src/algebra/mod.rs index fffafe5..ec9e3ad 100644 --- a/src/algebra.rs +++ b/src/algebra/mod.rs @@ -1,3 +1,9 @@ +mod matrix; +pub use matrix::NeuraMatrix; + +mod vector; +pub use vector::NeuraVector; + /// An extension of `std::ops::AddAssign` and `std::ops::Default` pub trait NeuraVectorSpace { fn add_assign(&mut self, other: &Self); @@ -30,6 +36,24 @@ impl NeuraVectorSpace for () { } } +impl NeuraVectorSpace for Box { + fn add_assign(&mut self, other: &Self) { + self.as_mut().add_assign(other.as_ref()); + } + + fn mul_assign(&mut self, by: f64) { + self.as_mut().mul_assign(by); + } + + fn zero() -> Self { + Box::new(T::zero()) + } + + fn norm_squared(&self) -> f64 { + self.as_ref().norm_squared() + } +} + impl NeuraVectorSpace for (Left, Right) { fn add_assign(&mut self, other: &Self) { NeuraVectorSpace::add_assign(&mut self.0, &other.0); diff --git a/src/algebra/vector.rs b/src/algebra/vector.rs new file mode 100644 index 0000000..f5e637e --- /dev/null +++ b/src/algebra/vector.rs @@ -0,0 +1,278 @@ +use std::borrow::Borrow; + +use super::*; +use boxed_array::from_cloned; +use num::Float; + +#[derive(Clone, Debug, PartialEq)] +pub struct NeuraVector { + pub data: Box<[F; LENGTH]>, +} + +impl NeuraVector { + #[inline(always)] + pub fn from_value(value: F) -> Self + where + F: Clone, + { + Self { + data: from_cloned(&value), + } + } + + #[inline(always)] + pub fn get(&self, index: usize) -> Option<&F> { + if index >= LENGTH { + None + } else { + Some(&self.data[index]) + } + } + + #[inline(always)] + pub fn len(&self) -> usize { + LENGTH + } + + pub fn iter<'a>(&'a self) -> std::slice::Iter<'a, F> { + self.data.iter() + } +} + +impl NeuraVector { + pub fn dot(&self, other: impl AsRef<[F; LENGTH]>) -> F { + let mut sum = F::zero(); + let other = other.as_ref(); + + for i in 0..LENGTH { + sum = sum + self.data[i] * other[i]; + } + + sum + } + + /// Returns $left^{\top} \cdot right$, ie. $\ket{left} \bra{right}$ + pub fn reverse_dot( + &self, + other: impl Borrow<[F; WIDTH]>, + ) -> NeuraMatrix { + let mut result: NeuraMatrix = NeuraMatrix::from_value(F::zero()); + let other = other.borrow(); + + for i in 0..LENGTH { + for j in 0..WIDTH { + result[i][j] = self.data[i] * other[j]; + } + } + + result + } + + pub fn hadamard_product(&self, other: impl AsRef<[F; LENGTH]>) -> NeuraVector { + let mut result: NeuraVector = NeuraVector::from_value(F::zero()); + let other = other.as_ref(); + + for i in 0..LENGTH { + result[i] = self.data[i] * other[i]; + } + + result + } +} + +impl + Into> NeuraVectorSpace + for NeuraVector +{ + fn add_assign(&mut self, other: &Self) { + for i in 0..LENGTH { + self.data[i] = self.data[i] + other.data[i]; + } + } + + fn mul_assign(&mut self, by: f64) { + for i in 0..LENGTH { + self.data[i] = self.data[i] * by.into(); + } + } + + #[inline(always)] + fn zero() -> Self { + Self::from_value(F::zero()) + } + + fn norm_squared(&self) -> f64 { + let mut sum = F::zero(); + + for i in 0..LENGTH { + sum = sum + self.data[i] * self.data[i]; + } + + sum.into() + } +} + +impl std::ops::Index for NeuraVector { + type Output = F; + + #[inline(always)] + fn index(&self, index: usize) -> &Self::Output { + if index >= LENGTH { + panic!( + "Tried indexing element {} of NeuraVector<{}, _>", + index, LENGTH + ); + } + + &self.data[index] + } +} + +impl std::ops::IndexMut for NeuraVector { + #[inline(always)] + fn index_mut(&mut self, index: usize) -> &mut Self::Output { + if index >= LENGTH { + panic!( + "Tried indexing element {} of NeuraVector<{}, _>", + index, LENGTH + ); + } + + &mut self.data[index] + } +} + +impl AsRef<[F; LENGTH]> for NeuraVector { + #[inline(always)] + fn as_ref(&self) -> &[F; LENGTH] { + &self.data + } +} + +impl AsRef<[F]> for NeuraVector { + #[inline(always)] + fn as_ref(&self) -> &[F] { + self.data.as_ref() + } +} + +impl Borrow<[F; LENGTH]> for NeuraVector { + #[inline(always)] + fn borrow(&self) -> &[F; LENGTH] { + &self.data + } +} + +impl Borrow<[F; LENGTH]> for &NeuraVector { + #[inline(always)] + fn borrow(&self) -> &[F; LENGTH] { + &self.data + } +} + +impl From> for NeuraVector { + fn from(data: Box<[F; LENGTH]>) -> Self { + Self { data } + } +} + +impl From> for Box<[F; LENGTH]> { + fn from(vector: NeuraVector) -> Self { + vector.data + } +} + +impl From<&[F; LENGTH]> for NeuraVector { + /// **Warning:** when using this function, make sure that the array is not allocated on the stack, + /// or that `LENGTH` is bounded. + fn from(data: &[F; LENGTH]) -> Self { + let mut res = Self::default(); + + for i in 0..LENGTH { + res.data[i] = data[i].clone(); + } + + res + } +} + +impl From<[F; LENGTH]> for NeuraVector { + /// **Warning:** when using this function, make sure that `LENGTH` is bounded. + fn from(data: [F; LENGTH]) -> Self { + Self { + data: Box::new(data), + } + } +} + +impl Default for NeuraVector { + #[inline(always)] + fn default() -> Self { + Self::from_value(F::default()) + } +} + +impl IntoIterator for NeuraVector { + type Item = F; + type IntoIter = std::array::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.data.into_iter() + } +} + +impl<'a, const LENGTH: usize, F> IntoIterator for &'a NeuraVector { + type Item = &'a F; + type IntoIter = std::slice::Iter<'a, F>; + + fn into_iter(self) -> Self::IntoIter { + self.data.iter() + } +} + +impl<'a, const LENGTH: usize, F> IntoIterator for &'a mut NeuraVector { + type Item = &'a mut F; + type IntoIter = std::slice::IterMut<'a, F>; + + fn into_iter(self) -> Self::IntoIter { + self.data.iter_mut() + } +} + +impl<'a, const LENGTH: usize, F: Default + Clone> FromIterator for NeuraVector { + fn from_iter>(iter: T) -> Self { + let mut res = Self::default(); + let mut iter = iter.into_iter(); + + for i in 0..LENGTH { + if let Some(next) = iter.next() { + res[i] = next; + } else { + break; + } + } + + res + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_reverse_dot() { + let left: NeuraVector<_, f64> = [2.0, 3.0, 5.0].into(); + let right: NeuraVector<_, f64> = [7.0, 11.0, 13.0, 17.0].into(); + + let expected: NeuraMatrix<_, _, f64> = [ + [14.0, 22.0, 26.0, 34.0], + [21.0, 33.0, 39.0, 51.0], + [35.0, 55.0, 65.0, 85.0], + ] + .into(); + + let actual = left.reverse_dot(right); + + assert_eq!(expected, actual); + } +} diff --git a/src/derivable/loss.rs b/src/derivable/loss.rs index 301ea08..7d23d8b 100644 --- a/src/derivable/loss.rs +++ b/src/derivable/loss.rs @@ -1,14 +1,16 @@ +use crate::algebra::NeuraVector; + use super::NeuraLoss; #[derive(Clone, Copy, Debug, PartialEq)] pub struct Euclidean; impl NeuraLoss for Euclidean { - type Input = [f64; N]; - type Target = [f64; N]; + type Input = NeuraVector; + type Target = NeuraVector; #[inline] - fn eval(&self, target: &[f64; N], actual: &[f64; N]) -> f64 { + fn eval(&self, target: &NeuraVector, actual: &NeuraVector) -> f64 { let mut sum_squared = 0.0; for i in 0..N { @@ -19,8 +21,12 @@ impl NeuraLoss for Euclidean { } #[inline] - fn nabla(&self, target: &[f64; N], actual: &[f64; N]) -> [f64; N] { - let mut res = [0.0; N]; + fn nabla( + &self, + target: &NeuraVector, + actual: &NeuraVector, + ) -> NeuraVector { + let mut res = NeuraVector::default(); // ∂E(y)/∂yᵢ = yᵢ - yᵢ' for i in 0..N { @@ -57,8 +63,8 @@ impl CrossEntropy { } impl NeuraLoss for CrossEntropy { - type Input = [f64; N]; - type Target = [f64; N]; + type Input = NeuraVector; + type Target = NeuraVector; fn eval(&self, target: &Self::Target, actual: &Self::Input) -> f64 { let mut result = 0.0; @@ -71,7 +77,7 @@ impl NeuraLoss for CrossEntropy { } fn nabla(&self, target: &Self::Target, actual: &Self::Input) -> Self::Input { - let mut result = [0.0; N]; + let mut result = NeuraVector::default(); for i in 0..N { result[i] = self.derivate_single(target[i], actual[i]); diff --git a/src/layer/dense.rs b/src/layer/dense.rs index e1be4f0..ff921a8 100644 --- a/src/layer/dense.rs +++ b/src/layer/dense.rs @@ -1,8 +1,7 @@ use super::{NeuraLayer, NeuraTrainableLayer}; use crate::{ - algebra::NeuraVectorSpace, + algebra::{NeuraMatrix, NeuraVector, NeuraVectorSpace}, derivable::NeuraDerivable, - utils::{multiply_matrix_transpose_vector, multiply_matrix_vector, reverse_dot_product}, }; use rand::Rng; @@ -15,8 +14,8 @@ pub struct NeuraDenseLayer< const INPUT_LEN: usize, const OUTPUT_LEN: usize, > { - weights: [[f64; INPUT_LEN]; OUTPUT_LEN], - bias: [f64; OUTPUT_LEN], + weights: NeuraMatrix, + bias: NeuraVector, activation: Act, regularization: Reg, } @@ -29,8 +28,8 @@ impl< > NeuraDenseLayer { pub fn new( - weights: [[f64; INPUT_LEN]; OUTPUT_LEN], - bias: [f64; OUTPUT_LEN], + weights: NeuraMatrix, + bias: NeuraVector, activation: Act, regularization: Reg, ) -> Self { @@ -43,7 +42,7 @@ impl< } pub fn from_rng(rng: &mut impl Rng, activation: Act, regularization: Reg) -> Self { - let mut weights = [[0.0; INPUT_LEN]; OUTPUT_LEN]; + let mut weights: NeuraMatrix = NeuraMatrix::from_value(0.0f64); // Use Xavier (or He) initialisation, using the harmonic mean // Ref: https://www.deeplearning.ai/ai-notes/initialization/index.html @@ -63,7 +62,7 @@ impl< Self { weights, // Biases are initialized based on the activation's hint - bias: [activation.bias_hint(); OUTPUT_LEN], + bias: NeuraVector::from_value(activation.bias_hint()), activation, regularization, } @@ -77,12 +76,12 @@ impl< const OUTPUT_LEN: usize, > NeuraLayer for NeuraDenseLayer { - type Input = [f64; INPUT_LEN]; + type Input = NeuraVector; - type Output = [f64; OUTPUT_LEN]; + type Output = NeuraVector; fn eval(&self, input: &Self::Input) -> Self::Output { - let mut result = multiply_matrix_vector(&self.weights, input); + let mut result = self.weights.multiply_vector(input); for i in 0..OUTPUT_LEN { result[i] = self.activation.eval(result[i] + self.bias[i]); @@ -99,30 +98,33 @@ impl< const OUTPUT_LEN: usize, > NeuraTrainableLayer for NeuraDenseLayer { - type Delta = ([[f64; INPUT_LEN]; OUTPUT_LEN], [f64; OUTPUT_LEN]); + type Delta = ( + NeuraMatrix, + NeuraVector, + ); fn backpropagate( &self, input: &Self::Input, epsilon: Self::Output, ) -> (Self::Input, Self::Delta) { - let evaluated = multiply_matrix_vector(&self.weights, input); + let evaluated = self.weights.multiply_vector(input); // Compute delta (the input gradient of the neuron) from epsilon (the output gradient of the neuron), // with `self.activation'(input) ° epsilon = delta` - let mut delta = epsilon.clone(); + let mut delta: NeuraVector = epsilon.clone(); for i in 0..OUTPUT_LEN { delta[i] *= self.activation.derivate(evaluated[i]); } // Compute the weight gradient - let weights_gradient = reverse_dot_product(&delta, input); + let weights_gradient = delta.reverse_dot(input); + + let new_epsilon = self.weights.transpose_multiply_vector(&delta); // According to https://datascience.stackexchange.com/questions/20139/gradients-for-bias-terms-in-backpropagation // The gradient of the bias is equal to the delta term of the backpropagation algorithm let bias_gradient = delta; - let new_epsilon = multiply_matrix_transpose_vector(&self.weights, &delta); - (new_epsilon, (weights_gradient, bias_gradient)) } @@ -132,7 +134,7 @@ impl< } fn regularize(&self) -> Self::Delta { - let mut res = ([[0.0; INPUT_LEN]; OUTPUT_LEN], [0.0; OUTPUT_LEN]); + let mut res = Self::Delta::default(); for i in 0..OUTPUT_LEN { for j in 0..INPUT_LEN { @@ -149,7 +151,10 @@ impl< #[cfg(test)] mod test { use super::*; - use crate::derivable::{activation::Relu, regularize::NeuraL0}; + use crate::{ + derivable::{activation::Relu, regularize::NeuraL0}, + utils::uniform_vector, + }; #[test] fn test_from_rng() { @@ -160,6 +165,16 @@ mod test { for x in 0..64 { input[x] = rng.gen(); } - assert!(layer.eval(&input).len() == 32); + assert!(layer.eval(&input.into()).len() == 32); + } + + #[test] + fn test_stack_overflow_big_layer() { + let layer = NeuraDenseLayer::from_rng(&mut rand::thread_rng(), Relu, NeuraL0) + as NeuraDenseLayer; + + layer.backpropagate(&uniform_vector(), uniform_vector()); + + as NeuraTrainableLayer>::Delta::zero(); } } diff --git a/src/layer/dropout.rs b/src/layer/dropout.rs index 442afa7..1111e66 100644 --- a/src/layer/dropout.rs +++ b/src/layer/dropout.rs @@ -1,12 +1,14 @@ use rand::Rng; +use crate::algebra::NeuraVector; + use super::{NeuraLayer, NeuraTrainableLayer}; #[derive(Clone, Debug)] pub struct NeuraDropoutLayer { pub dropout_probability: f64, multiplier: f64, - mask: [bool; LENGTH], + mask: NeuraVector, rng: R, } @@ -15,12 +17,12 @@ impl NeuraDropoutLayer { Self { dropout_probability, multiplier: 1.0, - mask: [false; LENGTH], + mask: NeuraVector::from_value(false), rng, } } - fn apply_dropout(&self, vector: &mut [f64; LENGTH]) { + fn apply_dropout(&self, vector: &mut NeuraVector) { for (index, &dropout) in self.mask.iter().enumerate() { if dropout { vector[index] = 0.0; @@ -32,8 +34,8 @@ impl NeuraDropoutLayer { } impl NeuraLayer for NeuraDropoutLayer { - type Input = [f64; LENGTH]; - type Output = [f64; LENGTH]; + type Input = NeuraVector; + type Output = NeuraVector; fn eval(&self, input: &Self::Input) -> Self::Output { let mut result = input.clone(); @@ -83,7 +85,7 @@ impl NeuraTrainableLayer for NeuraDropoutLayer { $crate::layer::NeuraLockLayer($layer) }; + + // ( "flatten" ) => { + // $crate::layer::NeuraFlattenLayer::new() as $crate::layer::NeuraFlattenLayer<_, _, f64> + // }; + + // ( "flatten", $width:expr, $height:expr ) => { + // $crate::layer::NeuraFlattenLayer::new() as $crate::layer::NeuraFlattenLayer<$width, $height, f64> + // }; + + // ( "reshape", $height:expr ) => { + // $crate::layer::NeuraReshapeLayer::new() as $crate::layer::NeuraReshapeLayer<_, $height, f64> + // }; + + // ( "reshape", $width:expr, $height:expr ) => { + // $crate::layer::NeuraReshapeLayer::new() as $crate::layer::NeuraReshapeLayer<$width, $height, f64> + // }; } diff --git a/src/layer/one_hot.rs b/src/layer/one_hot.rs index 029f9a9..22c094f 100644 --- a/src/layer/one_hot.rs +++ b/src/layer/one_hot.rs @@ -1,34 +1,32 @@ +use crate::algebra::{NeuraMatrix, NeuraVector}; + use super::{NeuraLayer, NeuraTrainableLayer}; /// A special layer that allows you to split a vector into one-hot vectors #[derive(Debug, Clone, PartialEq)] pub struct NeuraOneHotLayer; -impl NeuraLayer for NeuraOneHotLayer -where - [(); LENGTH * CATS]: Sized, -{ - type Input = [f64; LENGTH]; - type Output = [f64; LENGTH * CATS]; +impl NeuraLayer for NeuraOneHotLayer { + type Input = NeuraVector; + type Output = NeuraMatrix; fn eval(&self, input: &Self::Input) -> Self::Output { - let mut res = [0.0; LENGTH * CATS]; + let mut res = NeuraMatrix::default(); for i in 0..LENGTH { let cat_low = input[i].floor().max(0.0).min(CATS as f64 - 2.0); let amount = (input[i] - cat_low).max(0.0).min(1.0); let cat_low = cat_low as usize; - res[i * LENGTH + cat_low] = 1.0 - amount; - res[i * LENGTH + cat_low + 1] = amount; + res[i][cat_low] = 1.0 - amount; + res[i][cat_low + 1] = amount; } res } } -impl NeuraTrainableLayer for NeuraOneHotLayer -where - [(); LENGTH * CATS]: Sized, +impl NeuraTrainableLayer + for NeuraOneHotLayer { type Delta = (); @@ -37,11 +35,11 @@ where input: &Self::Input, epsilon: Self::Output, ) -> (Self::Input, Self::Delta) { - let mut res = [0.0; LENGTH]; + let mut res = NeuraVector::default(); for i in 0..LENGTH { let cat_low = input[i].floor().max(0.0).min(CATS as f64 - 2.0) as usize; - let epsilon = -epsilon[i * LENGTH + cat_low] + epsilon[i * LENGTH + cat_low + 1]; + let epsilon = -epsilon[i][cat_low] + epsilon[i][cat_low + 1]; // Scale epsilon by how many entries were ignored res[i] = epsilon * CATS as f64 / 2.0; } diff --git a/src/layer/reshape.rs b/src/layer/reshape.rs new file mode 100644 index 0000000..96979ab --- /dev/null +++ b/src/layer/reshape.rs @@ -0,0 +1,147 @@ +//! This module is currently disabled, as it relies on `generic_const_exprs`, which is too unstable to use as of now + +use super::{NeuraLayer, NeuraTrainableLayer}; + +/// Converts a `[[T; WIDTH]; HEIGHT]` into a `[T; WIDTH * HEIGHT]`. +/// Requires the `#![feature(generic_const_exprs)]` feature to be enabled. +pub struct NeuraFlattenLayer { + phantom: std::marker::PhantomData, +} + +/// Converts a `[T; WIDTH * HEIGHT]` into a `[[T; WIDTH]; HEIGHT]`. +/// Requires the `#![feature(generic_const_exprs)]` feature to be enabled. +pub struct NeuraReshapeLayer { + phantom: std::marker::PhantomData, +} + +#[inline(always)] +fn flatten( + input: &[[T; WIDTH]; HEIGHT], +) -> [T; WIDTH * HEIGHT] +where + [T; WIDTH * HEIGHT]: Sized, +{ + let mut res = [T::default(); WIDTH * HEIGHT]; + + // Hopefully the optimizer realizes this can be all optimized away + for i in 0..HEIGHT { + for j in 0..WIDTH { + res[i * WIDTH + j] = input[i][j]; + } + } + + res +} + +#[inline(always)] +fn reshape( + input: &[T; WIDTH * HEIGHT], +) -> [[T; WIDTH]; HEIGHT] +where + [T; WIDTH * HEIGHT]: Sized, +{ + let mut res = [[T::default(); WIDTH]; HEIGHT]; + + // Hopefully the optimizer realizes this can be all optimized away + for i in 0..HEIGHT { + for j in 0..WIDTH { + res[i][j] = input[i * WIDTH + j]; + } + } + + res +} + +impl NeuraFlattenLayer { + pub fn new() -> Self { + Self { + phantom: std::marker::PhantomData, + } + } +} + +impl NeuraReshapeLayer { + pub fn new() -> Self { + Self { + phantom: std::marker::PhantomData, + } + } +} + +impl NeuraLayer + for NeuraFlattenLayer +where + [T; WIDTH * HEIGHT]: Sized, +{ + type Input = [[T; WIDTH]; HEIGHT]; + + type Output = [T; WIDTH * HEIGHT]; + + #[inline(always)] + fn eval(&self, input: &Self::Input) -> Self::Output { + flatten(input) + } +} + +impl NeuraLayer + for NeuraReshapeLayer +where + [T; WIDTH * HEIGHT]: Sized, +{ + type Input = [T; WIDTH * HEIGHT]; + + type Output = [[T; WIDTH]; HEIGHT]; + + #[inline(always)] + fn eval(&self, input: &Self::Input) -> Self::Output { + reshape(input) + } +} + +impl NeuraTrainableLayer + for NeuraFlattenLayer +where + [T; WIDTH * HEIGHT]: Sized, +{ + type Delta = (); + + fn backpropagate( + &self, + _input: &Self::Input, + epsilon: Self::Output, + ) -> (Self::Input, Self::Delta) { + (reshape(&epsilon), ()) + } + + fn regularize(&self) -> Self::Delta { + todo!() + } + + fn apply_gradient(&mut self, _gradient: &Self::Delta) { + // Noop + } +} + +impl NeuraTrainableLayer + for NeuraReshapeLayer +where + [T; WIDTH * HEIGHT]: Sized, +{ + type Delta = (); + + fn backpropagate( + &self, + _input: &Self::Input, + epsilon: Self::Output, + ) -> (Self::Input, Self::Delta) { + (flatten(&epsilon), ()) + } + + fn regularize(&self) -> Self::Delta { + todo!() + } + + fn apply_gradient(&mut self, _gradient: &Self::Delta) { + // Noop + } +} diff --git a/src/layer/softmax.rs b/src/layer/softmax.rs index ebbc22b..ab27a69 100644 --- a/src/layer/softmax.rs +++ b/src/layer/softmax.rs @@ -1,4 +1,4 @@ -use crate::utils::multiply_vectors_pointwise; +use crate::algebra::NeuraVector; use super::{NeuraLayer, NeuraTrainableLayer}; @@ -13,16 +13,16 @@ impl NeuraSoftmaxLayer { } impl NeuraLayer for NeuraSoftmaxLayer { - type Input = [f64; LENGTH]; - type Output = [f64; LENGTH]; + type Input = NeuraVector; + type Output = NeuraVector; fn eval(&self, input: &Self::Input) -> Self::Output { - let mut res = input.clone(); + let mut res: Self::Input = input.clone(); let mut max = 0.0; - for item in &res { - if *item > max { - max = *item; + for &item in &res { + if item > max { + max = item; } } @@ -55,10 +55,10 @@ impl NeuraTrainableLayer for NeuraSoftmaxLayer { let evaluated = self.eval(input); // Compute $a_{l-1,i} \epsilon_{l,i}$ - epsilon = multiply_vectors_pointwise(&epsilon, &evaluated); + epsilon = epsilon.hadamard_product(&evaluated); // Compute $\sum_{k}{a_{l-1,k} \epsilon_{l,k}}$ - let sum_diagonal_terms: f64 = epsilon.iter().copied().sum(); + let sum_diagonal_terms: f64 = epsilon.iter().sum(); for i in 0..LENGTH { // Multiply $\sum_{k}{a_{l-1,k} \epsilon_{l,k}}$ by $a_{l-1,i}$ and add it to $a_{l-1,i} \epsilon_{l,i}$ @@ -79,10 +79,8 @@ impl NeuraTrainableLayer for NeuraSoftmaxLayer { #[cfg(test)] mod test { - use crate::algebra::NeuraVectorSpace; - use crate::utils::{ - matrix_from_diagonal, multiply_matrix_vector, reverse_dot_product, uniform_vector, - }; + use crate::algebra::{NeuraMatrix, NeuraVectorSpace}; + use crate::utils::uniform_vector; use super::*; @@ -91,7 +89,7 @@ mod test { const EPSILON: f64 = 0.000002; let layer = NeuraSoftmaxLayer::new() as NeuraSoftmaxLayer<3>; - let result = layer.eval(&[1.0, 2.0, 8.0]); + let result = layer.eval(&[1.0, 2.0, 8.0].into()); assert!((result[0] - 0.0009088).abs() < EPSILON); assert!((result[1] - 0.0024704).abs() < EPSILON); @@ -113,7 +111,7 @@ mod test { for epsilon2 in [2.9, 3.1, 3.7] { let epsilon = [epsilon1, epsilon2]; - let (epsilon, _) = layer.backpropagate(&input, epsilon); + let (epsilon, _) = layer.backpropagate(&input.into(), epsilon.into()); let expected = [ output[0] * (1.0 - output[0]) * epsilon1 - output[1] * output[0] * epsilon2, @@ -136,15 +134,15 @@ mod test { let layer = NeuraSoftmaxLayer::new() as NeuraSoftmaxLayer<4>; for _ in 0..100 { - let input: [f64; 4] = uniform_vector(); + let input = uniform_vector::<4>(); let evaluated = layer.eval(&input); - let loss: [f64; 4] = uniform_vector(); + let loss = uniform_vector::<4>(); - let mut derivative = reverse_dot_product(&evaluated, &evaluated); + let mut derivative = evaluated.reverse_dot(&evaluated); derivative.mul_assign(-1.0); - derivative.add_assign(&matrix_from_diagonal(&evaluated)); + derivative.add_assign(&NeuraMatrix::from_diagonal(&evaluated)); - let expected = multiply_matrix_vector(&derivative, &loss); + let expected = derivative.multiply_vector(&loss); let (actual, _) = layer.backpropagate(&input, loss); for i in 0..4 { diff --git a/src/lib.rs b/src/lib.rs index 61ee52a..493e989 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,6 @@ #![feature(generic_arg_infer)] #![feature(generic_associated_types)] -#![feature(generic_const_exprs)] +// #![feature(generic_const_exprs)] pub mod algebra; pub mod derivable; @@ -11,7 +11,7 @@ pub mod train; mod utils; // TODO: move to a different file -pub use utils::{argmax, one_hot}; +pub use utils::{argmax, cycle_shuffling, one_hot}; pub mod prelude { // Macros @@ -21,5 +21,4 @@ pub mod prelude { pub use crate::layer::{NeuraDenseLayer, NeuraDropoutLayer, NeuraLayer}; pub use crate::network::sequential::{NeuraSequential, NeuraSequentialTail}; pub use crate::train::{NeuraBackprop, NeuraBatchedTrainer}; - pub use crate::utils::cycle_shuffling; } diff --git a/src/network/sequential.rs b/src/network/sequential.rs index 370a75c..602e50d 100644 --- a/src/network/sequential.rs +++ b/src/network/sequential.rs @@ -8,7 +8,7 @@ use super::NeuraTrainableNetwork; #[derive(Clone, Debug)] pub struct NeuraSequential { pub layer: Layer, - pub child_network: ChildNetwork, + pub child_network: Box, } /// Operations on the tail end of a sequential network @@ -24,7 +24,7 @@ impl NeuraSequential { pub fn new(layer: Layer, child_network: ChildNetwork) -> Self { Self { layer, - child_network, + child_network: Box::new(child_network), } } @@ -36,13 +36,13 @@ impl NeuraSequential { } pub fn trim_front(self) -> ChildNetwork { - self.child_network + *self.child_network } pub fn push_front(self, layer: T) -> NeuraSequential { NeuraSequential { layer: layer, - child_network: self, + child_network: Box::new(self), } } } @@ -59,10 +59,10 @@ impl NeuraSequentialTail for NeuraSequential { fn push_tail(self, layer: T) -> Self::TailPushed { NeuraSequential { layer: self.layer, - child_network: NeuraSequential { + child_network: Box::new(NeuraSequential { layer, - child_network: (), - }, + child_network: Box::new(()), + }), } } } @@ -78,14 +78,14 @@ impl NeuraSequentialTail fn trim_tail(self) -> Self::TailTrimmed { NeuraSequential { layer: self.layer, - child_network: self.child_network.trim_tail(), + child_network: Box::new(self.child_network.trim_tail()), } } fn push_tail(self, layer: T) -> Self::TailPushed { NeuraSequential { layer: self.layer, - child_network: self.child_network.push_tail(layer), + child_network: Box::new(self.child_network.push_tail(layer)), } } } @@ -145,7 +145,7 @@ impl NeuraTrainableNetwork for NeuraSequential> NeuraTrainableNetwork for NeuraSequential { - type Delta = (Layer::Delta, ChildNetwork::Delta); + type Delta = (Layer::Delta, Box); fn apply_gradient(&mut self, gradient: &Self::Delta) { self.layer.apply_gradient(&gradient.0); @@ -165,11 +165,17 @@ impl Self::Delta { - (self.layer.regularize(), self.child_network.regularize()) + ( + self.layer.regularize(), + Box::new(self.child_network.regularize()), + ) } fn prepare_epoch(&mut self) { @@ -187,7 +193,7 @@ impl From for NeuraSequential { fn from(layer: Layer) -> Self { Self { layer, - child_network: (), + child_network: Box::new(()), } } } diff --git a/src/train.rs b/src/train.rs index 875ac0a..714dd4f 100644 --- a/src/train.rs +++ b/src/train.rs @@ -1,5 +1,5 @@ use crate::{ - algebra::NeuraVectorSpace, + algebra::{NeuraVector, NeuraVectorSpace}, derivable::NeuraLoss, layer::NeuraLayer, network::{sequential::NeuraSequential, NeuraTrainableNetwork}, @@ -38,8 +38,8 @@ impl NeuraBackprop { } } -impl + Clone> - NeuraGradientSolver<[f64; N], Loss::Target> for NeuraBackprop +impl> + Clone> + NeuraGradientSolver, Loss::Target> for NeuraBackprop { fn get_gradient( &self, @@ -49,7 +49,7 @@ impl + Clone> ) -> as NeuraTrainableNetwork>::Delta where NeuraSequential: - NeuraTrainableNetwork, + NeuraTrainableNetwork>, { trainable.backpropagate(input, target, self.loss.clone()).1 } @@ -62,7 +62,7 @@ impl + Clone> ) -> f64 where NeuraSequential: - NeuraTrainableNetwork, + NeuraTrainableNetwork>, { let output = trainable.eval(&input); self.loss.eval(target, &output) @@ -146,15 +146,17 @@ impl NeuraBatchedTrainer { // Contains `momentum_factor * factor * gradient_sum_previous_iter` let mut previous_gradient_sum = - as NeuraTrainableNetwork>::Delta::zero(); + Box::< as NeuraTrainableNetwork>::Delta>::zero(); 'd: for iteration in 0..self.iterations { - let mut gradient_sum = - as NeuraTrainableNetwork>::Delta::zero(); + let mut gradient_sum = Box::< + as NeuraTrainableNetwork>::Delta, + >::zero(); network.prepare_epoch(); for _ in 0..self.batch_size { if let Some((input, target)) = iter.next() { - let gradient = gradient_solver.get_gradient(&network, &input, &target); + let gradient = + Box::new(gradient_solver.get_gradient(&network, &input, &target)); gradient_sum.add_assign(&gradient); } else { break 'd; @@ -164,7 +166,7 @@ impl NeuraBatchedTrainer { gradient_sum.mul_assign(factor); // Add regularization gradient - let mut reg_gradient = network.regularize(); + let mut reg_gradient = Box::new(network.regularize()); reg_gradient.mul_assign(reg_factor); gradient_sum.add_assign(®_gradient); @@ -207,12 +209,15 @@ mod test { for wa in [0.0, 0.25, 0.5, 1.0] { for wb in [0.0, 0.25, 0.5, 1.0] { let network = NeuraSequential::new( - NeuraDenseLayer::new([[wa, wb]], [0.0], Linear, NeuraL0), + NeuraDenseLayer::new([[wa, wb]].into(), [0.0].into(), Linear, NeuraL0), (), ); - let gradient = - NeuraBackprop::new(Euclidean).get_gradient(&network, &[1.0, 1.0], &[0.0]); + let gradient = NeuraBackprop::new(Euclidean).get_gradient( + &network, + &[1.0, 1.0].into(), + &[0.0].into(), + ); let expected = wa + wb; assert!((gradient.0[0][0] - expected) < 0.001); @@ -226,24 +231,33 @@ mod test { const EPSILON: f64 = 0.00001; // Test that we get the same values as https://hmkcode.com/ai/backpropagation-step-by-step/ let network = neura_sequential![ - NeuraDenseLayer::new([[0.11, 0.21], [0.12, 0.08]], [0.0; 2], Linear, NeuraL0), - NeuraDenseLayer::new([[0.14, 0.15]], [0.0], Linear, NeuraL0) + NeuraDenseLayer::new( + [[0.11, 0.21], [0.12, 0.08]].into(), + [0.0; 2].into(), + Linear, + NeuraL0 + ), + NeuraDenseLayer::new([[0.14, 0.15]].into(), [0.0].into(), Linear, NeuraL0) ]; let input = [2.0, 3.0]; let target = [1.0]; - let intermediary = network.clone().trim_tail().eval(&input); + let intermediary = network.clone().trim_tail().eval(&input.into()); assert_approx!(0.85, intermediary[0], EPSILON); assert_approx!(0.48, intermediary[1], EPSILON); - assert_approx!(0.191, network.eval(&input)[0], EPSILON); + assert_approx!(0.191, network.eval(&input.into())[0], EPSILON); - assert_approx!(0.327, Euclidean.eval(&target, &network.eval(&input)), 0.001); + assert_approx!( + 0.327, + Euclidean.eval(&target.into(), &network.eval(&input.into())), + 0.001 + ); - let delta = network.eval(&input)[0] - target[0]; + let delta = network.eval(&input.into())[0] - target[0]; let (gradient_first, gradient_second) = - NeuraBackprop::new(Euclidean).get_gradient(&network, &input, &target); + NeuraBackprop::new(Euclidean).get_gradient(&network, &input.into(), &target.into()); let gradient_first = gradient_first.0; let gradient_second = gradient_second.0[0]; diff --git a/src/utils.rs b/src/utils.rs index 7367324..8fc3122 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,79 +1,4 @@ -pub(crate) fn multiply_matrix_vector( - matrix: &[[f64; WIDTH]; HEIGHT], - vector: &[f64; WIDTH], -) -> [f64; HEIGHT] { - let mut result = [0.0; HEIGHT]; - - for i in 0..HEIGHT { - let mut sum = 0.0; - for k in 0..WIDTH { - sum += matrix[i][k] * vector[k]; - } - result[i] = sum; - } - - result -} - -/// Equivalent to `multiply_matrix_vector(transpose(matrix), vector)`. -pub(crate) fn multiply_matrix_transpose_vector( - matrix: &[[f64; WIDTH]; HEIGHT], - vector: &[f64; HEIGHT], -) -> [f64; WIDTH] { - let mut result = [0.0; WIDTH]; - - for i in 0..WIDTH { - let mut sum = 0.0; - for k in 0..HEIGHT { - sum += matrix[k][i] * vector[k]; - } - result[i] = sum; - } - - result -} - -// Returns $left^{\top} \cdot right$, ie. $\ket{left} \bra{right}$ -pub(crate) fn reverse_dot_product( - left: &[f64; HEIGHT], - right: &[f64; WIDTH], -) -> [[f64; WIDTH]; HEIGHT] { - let mut result = [[0.0; WIDTH]; HEIGHT]; - - for i in 0..HEIGHT { - for j in 0..WIDTH { - result[i][j] = left[i] * right[j]; - } - } - - result -} - -pub(crate) fn multiply_vectors_pointwise( - left: &[f64; LENGTH], - right: &[f64; LENGTH], -) -> [f64; LENGTH] { - let mut result = [0.0; LENGTH]; - - for i in 0..LENGTH { - result[i] = left[i] * right[i]; - } - - result -} - -#[cfg(test)] -pub(crate) fn matrix_from_diagonal( - vector: &[f64; LENGTH], -) -> [[f64; LENGTH]; LENGTH] { - let mut result = [[0.0; LENGTH]; LENGTH]; - - for i in 0..LENGTH { - result[i][i] = vector[i]; - } - - result -} +use crate::algebra::NeuraVector; #[allow(dead_code)] pub(crate) fn assign_add_vector(sum: &mut [f64; N], operand: &[f64; N]) { @@ -164,9 +89,10 @@ where } #[cfg(test)] -pub(crate) fn uniform_vector() -> [f64; LENGTH] { +pub(crate) fn uniform_vector() -> NeuraVector { use rand::Rng; - let mut res = [0.0; LENGTH]; + + let mut res: NeuraVector = NeuraVector::default(); let mut rng = rand::thread_rng(); for i in 0..LENGTH { @@ -176,8 +102,8 @@ pub(crate) fn uniform_vector() -> [f64; LENGTH] { res } -pub fn one_hot(value: usize) -> [f64; N] { - let mut res = [0.0; N]; +pub fn one_hot(value: usize) -> NeuraVector { + let mut res = NeuraVector::default(); if value < N { res[value] = 1.0; } @@ -196,25 +122,6 @@ pub fn argmax(array: &[f64]) -> usize { res } -#[cfg(test)] -mod test { - use super::*; - - #[test] - fn test_reverse_dot_product() { - let left = [2.0, 3.0, 5.0]; - let right = [7.0, 11.0, 13.0, 17.0]; - - let expected = [ - [14.0, 22.0, 26.0, 34.0], - [21.0, 33.0, 39.0, 51.0], - [35.0, 55.0, 65.0, 85.0], - ]; - - assert_eq!(expected, reverse_dot_product(&left, &right)); - } -} - #[cfg(test)] #[macro_export] macro_rules! assert_approx {