♻️ Implement and transition to NeuraMatrix and NeuraVector, to prevent stack overflows

main
Shad Amethyst 2 years ago
parent 920bca4a48
commit 6c1d6874d7

1
.gitignore vendored

@ -1,2 +1,3 @@
/target /target
/Cargo.lock /Cargo.lock
/data

@ -6,7 +6,9 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
boxed-array = "0.1.0"
ndarray = "^0.15" ndarray = "^0.15"
num = "^0.4"
# num-traits = "0.2.15" # num-traits = "0.2.15"
rand = "^0.8" rand = "^0.8"
rand_distr = "0.4.3" rand_distr = "0.4.3"
@ -14,3 +16,4 @@ rand_distr = "0.4.3"
[dev-dependencies] [dev-dependencies]
image = "0.24.6" image = "0.24.6"
viuer = "0.6.2" viuer = "0.6.2"
rust-mnist = "0.2.0"

@ -31,7 +31,7 @@ fn main() {
(angle.cos() * radius, angle.sin() * radius) (angle.cos() * radius, angle.sin() * radius)
}; };
([x, y], neuramethyst::one_hot::<2>(category)) ([x, y].into(), neuramethyst::one_hot::<2>(category))
}); });
let test_inputs: Vec<_> = inputs.clone().take(10).collect(); let test_inputs: Vec<_> = inputs.clone().take(10).collect();
@ -49,7 +49,10 @@ fn main() {
); );
let network = network.clone(); let network = network.clone();
draw_neuron_activation(|input| network.eval(&input).into_iter().collect(), 6.0); draw_neuron_activation(
|input| network.eval(&input.into()).into_iter().collect(),
6.0,
);
println!("{}", epoch); println!("{}", epoch);
std::thread::sleep(std::time::Duration::new(0, 50_000_000)); std::thread::sleep(std::time::Duration::new(0, 50_000_000));
@ -72,7 +75,7 @@ fn main() {
let mut file = std::fs::File::create("target/bivariate.csv").unwrap(); let mut file = std::fs::File::create("target/bivariate.csv").unwrap();
for (input, _target) in test_inputs { for (input, _target) in test_inputs {
let guess = neuramethyst::argmax(&network.eval(&input)); let guess = neuramethyst::argmax(network.eval(&input).as_ref());
writeln!(&mut file, "{},{},{}", input[0], input[1], guess).unwrap(); writeln!(&mut file, "{},{},{}", input[0], input[1], guess).unwrap();
} }
} }

@ -0,0 +1,77 @@
#![feature(generic_arg_infer)]
// #![feature(generic_const_exprs)]
use neuramethyst::algebra::NeuraVector;
use rust_mnist::Mnist;
use neuramethyst::derivable::activation::{Linear, Relu};
use neuramethyst::derivable::loss::CrossEntropy;
use neuramethyst::{cycle_shuffling, one_hot, prelude::*};
fn main() {
const TRAIN_SIZE: usize = 100;
let Mnist {
train_data: train_images,
train_labels,
test_data: test_images,
test_labels,
..
} = Mnist::new("data/");
let train_images = train_images
.into_iter()
.map(|raw| {
raw.into_iter()
.map(|x| x as f64 / 255.0)
.collect::<NeuraVector<{ 28 * 28 }, f64>>()
})
.take(TRAIN_SIZE);
let train_labels = train_labels
.into_iter()
.map(|x| one_hot::<10>(x as usize))
.take(TRAIN_SIZE);
let test_images = test_images
.into_iter()
.map(|raw| {
raw.into_iter()
.map(|x| x as f64 / 255.0)
.collect::<NeuraVector<{ 28 * 28 }, f64>>()
})
.take(TRAIN_SIZE / 6);
let test_labels = test_labels
.into_iter()
.map(|x| one_hot::<10>(x as usize))
.take(TRAIN_SIZE / 6);
let train_iter = cycle_shuffling(
train_images.zip(train_labels.into_iter()),
rand::thread_rng(),
);
let test_inputs: Vec<_> = test_images.zip(test_labels.into_iter()).collect();
let mut network = neura_sequential![
neura_layer!("dense", { 28 * 28 }, 200; Relu),
neura_layer!("dropout", 0.5),
neura_layer!("dense", 100; Relu),
neura_layer!("dropout", 0.5),
neura_layer!("dense", 30; Relu),
neura_layer!("dropout", 0.5),
neura_layer!("dense", 10; Linear),
neura_layer!("softmax")
];
let mut trainer = NeuraBatchedTrainer::new(0.03, TRAIN_SIZE * 10);
trainer.log_iterations = (TRAIN_SIZE / 128).max(1);
trainer.batch_size = 128;
trainer.learning_momentum = 0.001;
trainer.train(
NeuraBackprop::new(CrossEntropy),
&mut network,
train_iter,
&test_inputs,
);
}

@ -1,8 +1,9 @@
#![feature(generic_arg_infer)] #![feature(generic_arg_infer)]
use neuramethyst::algebra::NeuraVector;
use neuramethyst::derivable::activation::Relu; use neuramethyst::derivable::activation::Relu;
use neuramethyst::derivable::loss::Euclidean; use neuramethyst::derivable::loss::Euclidean;
use neuramethyst::prelude::*; use neuramethyst::{cycle_shuffling, prelude::*};
fn main() { fn main() {
let mut network = neura_sequential![ let mut network = neura_sequential![
@ -11,14 +12,14 @@ fn main() {
neura_layer!("dense", 1; Relu) neura_layer!("dense", 1; Relu)
]; ];
let inputs = [ let inputs: [(NeuraVector<2, f64>, NeuraVector<1, f64>); 4] = [
([0.0, 0.0], [0.0]), ([0.0, 0.0].into(), [0.0].into()),
([0.0, 1.0], [1.0]), ([0.0, 1.0].into(), [1.0].into()),
([1.0, 0.0], [1.0]), ([1.0, 0.0].into(), [1.0].into()),
([1.0, 1.0], [0.0]), ([1.0, 1.0].into(), [0.0].into()),
]; ];
for (input, target) in inputs { for (input, target) in &inputs {
println!( println!(
"Input: {:?}, target: {}, actual: {:.3}", "Input: {:?}, target: {}, actual: {:.3}",
&input, &input,

@ -0,0 +1,292 @@
use std::borrow::Borrow;
use super::*;
use boxed_array::from_cloned;
use num::Float;
/// A simple abstraction around `[[F; WIDTH]; HEIGHT]`,
/// which ensures that all allocations that depend on `WIDTH` or `HEIGHT` are done on the heap,
/// without losing the length information.
#[derive(Clone, Debug, PartialEq)]
pub struct NeuraMatrix<const WIDTH: usize, const HEIGHT: usize, F> {
pub data: Box<[[F; WIDTH]; HEIGHT]>,
}
impl<const WIDTH: usize, const HEIGHT: usize, F> NeuraMatrix<WIDTH, HEIGHT, F> {
#[inline(always)]
pub fn from_value(value: F) -> Self
where
F: Clone,
{
Self {
data: from_cloned(&value),
}
}
#[inline(always)]
pub fn get(&self, x: usize, y: usize) -> Option<&F> {
if x >= WIDTH || y >= HEIGHT {
return None;
}
Some(&self.data[y][x])
}
}
impl<const WIDTH: usize, const HEIGHT: usize, F: Float> NeuraMatrix<WIDTH, HEIGHT, F> {
/// Returns `self * vector`
pub fn multiply_vector(&self, vector: impl Borrow<[F; WIDTH]>) -> NeuraVector<HEIGHT, F> {
let mut result: NeuraVector<HEIGHT, F> = NeuraVector::from_value(F::zero());
let vector = vector.borrow();
for i in 0..HEIGHT {
let mut sum = F::zero();
for k in 0..WIDTH {
sum = sum + self.data[i][k] * vector[k];
}
result[i] = sum;
}
result
}
/// Returns `transpose(self) * vector`,
/// without actually performing the transpose operation
pub fn transpose_multiply_vector(
&self,
vector: impl AsRef<[F; HEIGHT]>,
) -> NeuraVector<WIDTH, F> {
let mut result: NeuraVector<WIDTH, F> = NeuraVector::from_value(F::zero());
let vector = vector.as_ref();
for j in 0..WIDTH {
let mut sum = F::zero();
for k in 0..HEIGHT {
sum = sum + self.data[k][j] * vector[k];
}
result[j] = sum;
}
result
}
}
impl<const LENGTH: usize, F: Default + Clone> NeuraMatrix<LENGTH, LENGTH, F> {
pub fn from_diagonal(vector: impl AsRef<[F; LENGTH]>) -> Self {
let mut result: NeuraMatrix<LENGTH, LENGTH, F> = NeuraMatrix::default();
let vector = vector.as_ref();
for i in 0..LENGTH {
result[i][i] = vector[i].clone();
}
result
}
}
impl<const WIDTH: usize, const HEIGHT: usize, F: Float + From<f64> + Into<f64>> NeuraVectorSpace
for NeuraMatrix<WIDTH, HEIGHT, F>
{
fn add_assign(&mut self, other: &Self) {
for i in 0..HEIGHT {
for j in 0..WIDTH {
self.data[i][j] = self.data[i][j] + other.data[i][j];
}
}
}
fn mul_assign(&mut self, by: f64) {
let by: F = by.into();
for i in 0..HEIGHT {
for j in 0..WIDTH {
self.data[i][j] = self.data[i][j] * by;
}
}
}
#[inline(always)]
fn zero() -> Self {
Self::from_value(F::zero())
}
fn norm_squared(&self) -> f64 {
let mut sum = F::zero();
for i in 0..HEIGHT {
for j in 0..WIDTH {
let x = self.data[i][j];
sum = sum + x * x;
}
}
sum.into()
}
}
impl<const WIDTH: usize, const HEIGHT: usize, F> From<Box<[[F; WIDTH]; HEIGHT]>>
for NeuraMatrix<WIDTH, HEIGHT, F>
{
#[inline]
fn from(data: Box<[[F; WIDTH]; HEIGHT]>) -> Self {
Self { data }
}
}
impl<const WIDTH: usize, const HEIGHT: usize, F> From<NeuraMatrix<WIDTH, HEIGHT, F>>
for Box<[[F; WIDTH]; HEIGHT]>
{
#[inline]
fn from(matrix: NeuraMatrix<WIDTH, HEIGHT, F>) -> Self {
matrix.data
}
}
impl<const WIDTH: usize, const HEIGHT: usize, F: Default + Clone> From<&[[F; WIDTH]; HEIGHT]>
for NeuraMatrix<WIDTH, HEIGHT, F>
{
/// **Warning:** when using this function, make sure that the array is not allocated on the stack
/// or that `WIDTH` and `HEIGHT` are bounded.
#[inline]
fn from(data: &[[F; WIDTH]; HEIGHT]) -> Self {
let mut res = Self::default();
for i in 0..HEIGHT {
for j in 0..WIDTH {
res[i][j] = data[i][j].clone();
}
}
res
}
}
impl<const WIDTH: usize, const HEIGHT: usize, F> From<[[F; WIDTH]; HEIGHT]>
for NeuraMatrix<WIDTH, HEIGHT, F>
{
/// **Warning:** when using this function, make sure that `WIDTH` and `HEIGHT` are bounded.
fn from(data: [[F; WIDTH]; HEIGHT]) -> Self {
Self {
data: Box::new(data),
}
}
}
impl<const WIDTH: usize, const HEIGHT: usize, F> std::ops::Index<(usize, usize)>
for NeuraMatrix<WIDTH, HEIGHT, F>
{
type Output = F;
#[inline]
fn index(&self, index: (usize, usize)) -> &Self::Output {
if index.0 >= WIDTH || index.1 >= HEIGHT {
panic!(
"Index out of bound: tried indexing matrix element ({}, {}), which is outside of NeuraMatrix<{}, {}, _>",
index.0, index.1, WIDTH, HEIGHT
);
}
&self.data[index.1][index.0]
}
}
impl<const WIDTH: usize, const HEIGHT: usize, F> std::ops::IndexMut<(usize, usize)>
for NeuraMatrix<WIDTH, HEIGHT, F>
{
#[inline]
fn index_mut(&mut self, index: (usize, usize)) -> &mut Self::Output {
if index.0 >= WIDTH || index.1 >= HEIGHT {
panic!(
"Index out of bound: tried indexing matrix element ({}, {}), which is outside of NeuraMatrix<{}, {}, _>",
index.0, index.1, WIDTH, HEIGHT
);
}
&mut self.data[index.1][index.0]
}
}
impl<const WIDTH: usize, const HEIGHT: usize, F> std::ops::Index<usize>
for NeuraMatrix<WIDTH, HEIGHT, F>
{
type Output = [F; WIDTH];
#[inline(always)]
fn index(&self, index: usize) -> &Self::Output {
if index >= HEIGHT {
panic!(
"Index out of bound: tried indexing matrix row {}, which is outside of NeuraMatrix<{}, {}, _>",
index, WIDTH, HEIGHT
);
}
&self.data[index]
}
}
impl<const WIDTH: usize, const HEIGHT: usize, F> std::ops::IndexMut<usize>
for NeuraMatrix<WIDTH, HEIGHT, F>
{
#[inline(always)]
fn index_mut(&mut self, index: usize) -> &mut Self::Output {
if index >= HEIGHT {
panic!(
"Index out of bound: tried indexing matrix row {}, which is outside of NeuraMatrix<{}, {}, _>",
index, WIDTH, HEIGHT
);
}
&mut self.data[index]
}
}
impl<const WIDTH: usize, const HEIGHT: usize, F> AsRef<[[F; WIDTH]; HEIGHT]>
for NeuraMatrix<WIDTH, HEIGHT, F>
{
#[inline(always)]
fn as_ref(&self) -> &[[F; WIDTH]; HEIGHT] {
&self.data
}
}
impl<const WIDTH: usize, const HEIGHT: usize, F> Borrow<[[F; WIDTH]; HEIGHT]>
for NeuraMatrix<WIDTH, HEIGHT, F>
{
#[inline(always)]
fn borrow(&self) -> &[[F; WIDTH]; HEIGHT] {
&self.data
}
}
impl<const WIDTH: usize, const HEIGHT: usize, F: Default + Clone> Default
for NeuraMatrix<WIDTH, HEIGHT, F>
{
#[inline(always)]
fn default() -> Self {
Self::from_value(F::default())
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_index() {
let mut matrix: NeuraMatrix<1000, 1000, f64> = NeuraMatrix::from_value(0.0);
matrix[100][200] = 0.3;
assert_eq!(matrix[(200, 100)], 0.3);
matrix[(999, 999)] = 0.5;
assert_eq!(matrix[999][999], 0.5);
}
#[test]
#[should_panic(
expected = "Index out of bound: tried indexing matrix row 100, which is outside of NeuraMatrix<100, 100, _>"
)]
fn test_index_oob() {
let matrix: NeuraMatrix<100, 100, f64> = NeuraMatrix::from_value(0.0);
let _ = matrix[100];
}
}

@ -1,3 +1,9 @@
mod matrix;
pub use matrix::NeuraMatrix;
mod vector;
pub use vector::NeuraVector;
/// An extension of `std::ops::AddAssign` and `std::ops::Default` /// An extension of `std::ops::AddAssign` and `std::ops::Default`
pub trait NeuraVectorSpace { pub trait NeuraVectorSpace {
fn add_assign(&mut self, other: &Self); fn add_assign(&mut self, other: &Self);
@ -30,6 +36,24 @@ impl NeuraVectorSpace for () {
} }
} }
impl<T: NeuraVectorSpace> NeuraVectorSpace for Box<T> {
fn add_assign(&mut self, other: &Self) {
self.as_mut().add_assign(other.as_ref());
}
fn mul_assign(&mut self, by: f64) {
self.as_mut().mul_assign(by);
}
fn zero() -> Self {
Box::new(T::zero())
}
fn norm_squared(&self) -> f64 {
self.as_ref().norm_squared()
}
}
impl<Left: NeuraVectorSpace, Right: NeuraVectorSpace> NeuraVectorSpace for (Left, Right) { impl<Left: NeuraVectorSpace, Right: NeuraVectorSpace> NeuraVectorSpace for (Left, Right) {
fn add_assign(&mut self, other: &Self) { fn add_assign(&mut self, other: &Self) {
NeuraVectorSpace::add_assign(&mut self.0, &other.0); NeuraVectorSpace::add_assign(&mut self.0, &other.0);

@ -0,0 +1,278 @@
use std::borrow::Borrow;
use super::*;
use boxed_array::from_cloned;
use num::Float;
#[derive(Clone, Debug, PartialEq)]
pub struct NeuraVector<const LENGTH: usize, F> {
pub data: Box<[F; LENGTH]>,
}
impl<const LENGTH: usize, F> NeuraVector<LENGTH, F> {
#[inline(always)]
pub fn from_value(value: F) -> Self
where
F: Clone,
{
Self {
data: from_cloned(&value),
}
}
#[inline(always)]
pub fn get(&self, index: usize) -> Option<&F> {
if index >= LENGTH {
None
} else {
Some(&self.data[index])
}
}
#[inline(always)]
pub fn len(&self) -> usize {
LENGTH
}
pub fn iter<'a>(&'a self) -> std::slice::Iter<'a, F> {
self.data.iter()
}
}
impl<const LENGTH: usize, F: Float> NeuraVector<LENGTH, F> {
pub fn dot(&self, other: impl AsRef<[F; LENGTH]>) -> F {
let mut sum = F::zero();
let other = other.as_ref();
for i in 0..LENGTH {
sum = sum + self.data[i] * other[i];
}
sum
}
/// Returns $left^{\top} \cdot right$, ie. $\ket{left} \bra{right}$
pub fn reverse_dot<const WIDTH: usize>(
&self,
other: impl Borrow<[F; WIDTH]>,
) -> NeuraMatrix<WIDTH, LENGTH, F> {
let mut result: NeuraMatrix<WIDTH, LENGTH, F> = NeuraMatrix::from_value(F::zero());
let other = other.borrow();
for i in 0..LENGTH {
for j in 0..WIDTH {
result[i][j] = self.data[i] * other[j];
}
}
result
}
pub fn hadamard_product(&self, other: impl AsRef<[F; LENGTH]>) -> NeuraVector<LENGTH, F> {
let mut result: NeuraVector<LENGTH, F> = NeuraVector::from_value(F::zero());
let other = other.as_ref();
for i in 0..LENGTH {
result[i] = self.data[i] * other[i];
}
result
}
}
impl<const LENGTH: usize, F: Float + From<f64> + Into<f64>> NeuraVectorSpace
for NeuraVector<LENGTH, F>
{
fn add_assign(&mut self, other: &Self) {
for i in 0..LENGTH {
self.data[i] = self.data[i] + other.data[i];
}
}
fn mul_assign(&mut self, by: f64) {
for i in 0..LENGTH {
self.data[i] = self.data[i] * by.into();
}
}
#[inline(always)]
fn zero() -> Self {
Self::from_value(F::zero())
}
fn norm_squared(&self) -> f64 {
let mut sum = F::zero();
for i in 0..LENGTH {
sum = sum + self.data[i] * self.data[i];
}
sum.into()
}
}
impl<const LENGTH: usize, F> std::ops::Index<usize> for NeuraVector<LENGTH, F> {
type Output = F;
#[inline(always)]
fn index(&self, index: usize) -> &Self::Output {
if index >= LENGTH {
panic!(
"Tried indexing element {} of NeuraVector<{}, _>",
index, LENGTH
);
}
&self.data[index]
}
}
impl<const LENGTH: usize, F> std::ops::IndexMut<usize> for NeuraVector<LENGTH, F> {
#[inline(always)]
fn index_mut(&mut self, index: usize) -> &mut Self::Output {
if index >= LENGTH {
panic!(
"Tried indexing element {} of NeuraVector<{}, _>",
index, LENGTH
);
}
&mut self.data[index]
}
}
impl<const LENGTH: usize, F> AsRef<[F; LENGTH]> for NeuraVector<LENGTH, F> {
#[inline(always)]
fn as_ref(&self) -> &[F; LENGTH] {
&self.data
}
}
impl<const LENGTH: usize, F> AsRef<[F]> for NeuraVector<LENGTH, F> {
#[inline(always)]
fn as_ref(&self) -> &[F] {
self.data.as_ref()
}
}
impl<const LENGTH: usize, F> Borrow<[F; LENGTH]> for NeuraVector<LENGTH, F> {
#[inline(always)]
fn borrow(&self) -> &[F; LENGTH] {
&self.data
}
}
impl<const LENGTH: usize, F> Borrow<[F; LENGTH]> for &NeuraVector<LENGTH, F> {
#[inline(always)]
fn borrow(&self) -> &[F; LENGTH] {
&self.data
}
}
impl<const LENGTH: usize, F> From<Box<[F; LENGTH]>> for NeuraVector<LENGTH, F> {
fn from(data: Box<[F; LENGTH]>) -> Self {
Self { data }
}
}
impl<const LENGTH: usize, F> From<NeuraVector<LENGTH, F>> for Box<[F; LENGTH]> {
fn from(vector: NeuraVector<LENGTH, F>) -> Self {
vector.data
}
}
impl<const LENGTH: usize, F: Default + Clone> From<&[F; LENGTH]> for NeuraVector<LENGTH, F> {
/// **Warning:** when using this function, make sure that the array is not allocated on the stack,
/// or that `LENGTH` is bounded.
fn from(data: &[F; LENGTH]) -> Self {
let mut res = Self::default();
for i in 0..LENGTH {
res.data[i] = data[i].clone();
}
res
}
}
impl<const LENGTH: usize, F> From<[F; LENGTH]> for NeuraVector<LENGTH, F> {
/// **Warning:** when using this function, make sure that `LENGTH` is bounded.
fn from(data: [F; LENGTH]) -> Self {
Self {
data: Box::new(data),
}
}
}
impl<const LENGTH: usize, F: Default + Clone> Default for NeuraVector<LENGTH, F> {
#[inline(always)]
fn default() -> Self {
Self::from_value(F::default())
}
}
impl<const LENGTH: usize, F> IntoIterator for NeuraVector<LENGTH, F> {
type Item = F;
type IntoIter = std::array::IntoIter<F, LENGTH>;
fn into_iter(self) -> Self::IntoIter {
self.data.into_iter()
}
}
impl<'a, const LENGTH: usize, F> IntoIterator for &'a NeuraVector<LENGTH, F> {
type Item = &'a F;
type IntoIter = std::slice::Iter<'a, F>;
fn into_iter(self) -> Self::IntoIter {
self.data.iter()
}
}
impl<'a, const LENGTH: usize, F> IntoIterator for &'a mut NeuraVector<LENGTH, F> {
type Item = &'a mut F;
type IntoIter = std::slice::IterMut<'a, F>;
fn into_iter(self) -> Self::IntoIter {
self.data.iter_mut()
}
}
impl<'a, const LENGTH: usize, F: Default + Clone> FromIterator<F> for NeuraVector<LENGTH, F> {
fn from_iter<T: IntoIterator<Item = F>>(iter: T) -> Self {
let mut res = Self::default();
let mut iter = iter.into_iter();
for i in 0..LENGTH {
if let Some(next) = iter.next() {
res[i] = next;
} else {
break;
}
}
res
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_reverse_dot() {
let left: NeuraVector<_, f64> = [2.0, 3.0, 5.0].into();
let right: NeuraVector<_, f64> = [7.0, 11.0, 13.0, 17.0].into();
let expected: NeuraMatrix<_, _, f64> = [
[14.0, 22.0, 26.0, 34.0],
[21.0, 33.0, 39.0, 51.0],
[35.0, 55.0, 65.0, 85.0],
]
.into();
let actual = left.reverse_dot(right);
assert_eq!(expected, actual);
}
}

@ -1,14 +1,16 @@
use crate::algebra::NeuraVector;
use super::NeuraLoss; use super::NeuraLoss;
#[derive(Clone, Copy, Debug, PartialEq)] #[derive(Clone, Copy, Debug, PartialEq)]
pub struct Euclidean<const N: usize>; pub struct Euclidean<const N: usize>;
impl<const N: usize> NeuraLoss for Euclidean<N> { impl<const N: usize> NeuraLoss for Euclidean<N> {
type Input = [f64; N]; type Input = NeuraVector<N, f64>;
type Target = [f64; N]; type Target = NeuraVector<N, f64>;
#[inline] #[inline]
fn eval(&self, target: &[f64; N], actual: &[f64; N]) -> f64 { fn eval(&self, target: &NeuraVector<N, f64>, actual: &NeuraVector<N, f64>) -> f64 {
let mut sum_squared = 0.0; let mut sum_squared = 0.0;
for i in 0..N { for i in 0..N {
@ -19,8 +21,12 @@ impl<const N: usize> NeuraLoss for Euclidean<N> {
} }
#[inline] #[inline]
fn nabla(&self, target: &[f64; N], actual: &[f64; N]) -> [f64; N] { fn nabla(
let mut res = [0.0; N]; &self,
target: &NeuraVector<N, f64>,
actual: &NeuraVector<N, f64>,
) -> NeuraVector<N, f64> {
let mut res = NeuraVector::default();
// ∂E(y)/∂yᵢ = yᵢ - yᵢ' // ∂E(y)/∂yᵢ = yᵢ - yᵢ'
for i in 0..N { for i in 0..N {
@ -57,8 +63,8 @@ impl<const N: usize> CrossEntropy<N> {
} }
impl<const N: usize> NeuraLoss for CrossEntropy<N> { impl<const N: usize> NeuraLoss for CrossEntropy<N> {
type Input = [f64; N]; type Input = NeuraVector<N, f64>;
type Target = [f64; N]; type Target = NeuraVector<N, f64>;
fn eval(&self, target: &Self::Target, actual: &Self::Input) -> f64 { fn eval(&self, target: &Self::Target, actual: &Self::Input) -> f64 {
let mut result = 0.0; let mut result = 0.0;
@ -71,7 +77,7 @@ impl<const N: usize> NeuraLoss for CrossEntropy<N> {
} }
fn nabla(&self, target: &Self::Target, actual: &Self::Input) -> Self::Input { fn nabla(&self, target: &Self::Target, actual: &Self::Input) -> Self::Input {
let mut result = [0.0; N]; let mut result = NeuraVector::default();
for i in 0..N { for i in 0..N {
result[i] = self.derivate_single(target[i], actual[i]); result[i] = self.derivate_single(target[i], actual[i]);

@ -1,8 +1,7 @@
use super::{NeuraLayer, NeuraTrainableLayer}; use super::{NeuraLayer, NeuraTrainableLayer};
use crate::{ use crate::{
algebra::NeuraVectorSpace, algebra::{NeuraMatrix, NeuraVector, NeuraVectorSpace},
derivable::NeuraDerivable, derivable::NeuraDerivable,
utils::{multiply_matrix_transpose_vector, multiply_matrix_vector, reverse_dot_product},
}; };
use rand::Rng; use rand::Rng;
@ -15,8 +14,8 @@ pub struct NeuraDenseLayer<
const INPUT_LEN: usize, const INPUT_LEN: usize,
const OUTPUT_LEN: usize, const OUTPUT_LEN: usize,
> { > {
weights: [[f64; INPUT_LEN]; OUTPUT_LEN], weights: NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64>,
bias: [f64; OUTPUT_LEN], bias: NeuraVector<OUTPUT_LEN, f64>,
activation: Act, activation: Act,
regularization: Reg, regularization: Reg,
} }
@ -29,8 +28,8 @@ impl<
> NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN> > NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN>
{ {
pub fn new( pub fn new(
weights: [[f64; INPUT_LEN]; OUTPUT_LEN], weights: NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64>,
bias: [f64; OUTPUT_LEN], bias: NeuraVector<OUTPUT_LEN, f64>,
activation: Act, activation: Act,
regularization: Reg, regularization: Reg,
) -> Self { ) -> Self {
@ -43,7 +42,7 @@ impl<
} }
pub fn from_rng(rng: &mut impl Rng, activation: Act, regularization: Reg) -> Self { pub fn from_rng(rng: &mut impl Rng, activation: Act, regularization: Reg) -> Self {
let mut weights = [[0.0; INPUT_LEN]; OUTPUT_LEN]; let mut weights: NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64> = NeuraMatrix::from_value(0.0f64);
// Use Xavier (or He) initialisation, using the harmonic mean // Use Xavier (or He) initialisation, using the harmonic mean
// Ref: https://www.deeplearning.ai/ai-notes/initialization/index.html // Ref: https://www.deeplearning.ai/ai-notes/initialization/index.html
@ -63,7 +62,7 @@ impl<
Self { Self {
weights, weights,
// Biases are initialized based on the activation's hint // Biases are initialized based on the activation's hint
bias: [activation.bias_hint(); OUTPUT_LEN], bias: NeuraVector::from_value(activation.bias_hint()),
activation, activation,
regularization, regularization,
} }
@ -77,12 +76,12 @@ impl<
const OUTPUT_LEN: usize, const OUTPUT_LEN: usize,
> NeuraLayer for NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN> > NeuraLayer for NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN>
{ {
type Input = [f64; INPUT_LEN]; type Input = NeuraVector<INPUT_LEN, f64>;
type Output = [f64; OUTPUT_LEN]; type Output = NeuraVector<OUTPUT_LEN, f64>;
fn eval(&self, input: &Self::Input) -> Self::Output { fn eval(&self, input: &Self::Input) -> Self::Output {
let mut result = multiply_matrix_vector(&self.weights, input); let mut result = self.weights.multiply_vector(input);
for i in 0..OUTPUT_LEN { for i in 0..OUTPUT_LEN {
result[i] = self.activation.eval(result[i] + self.bias[i]); result[i] = self.activation.eval(result[i] + self.bias[i]);
@ -99,30 +98,33 @@ impl<
const OUTPUT_LEN: usize, const OUTPUT_LEN: usize,
> NeuraTrainableLayer for NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN> > NeuraTrainableLayer for NeuraDenseLayer<Act, Reg, INPUT_LEN, OUTPUT_LEN>
{ {
type Delta = ([[f64; INPUT_LEN]; OUTPUT_LEN], [f64; OUTPUT_LEN]); type Delta = (
NeuraMatrix<INPUT_LEN, OUTPUT_LEN, f64>,
NeuraVector<OUTPUT_LEN, f64>,
);
fn backpropagate( fn backpropagate(
&self, &self,
input: &Self::Input, input: &Self::Input,
epsilon: Self::Output, epsilon: Self::Output,
) -> (Self::Input, Self::Delta) { ) -> (Self::Input, Self::Delta) {
let evaluated = multiply_matrix_vector(&self.weights, input); let evaluated = self.weights.multiply_vector(input);
// Compute delta (the input gradient of the neuron) from epsilon (the output gradient of the neuron), // Compute delta (the input gradient of the neuron) from epsilon (the output gradient of the neuron),
// with `self.activation'(input) ° epsilon = delta` // with `self.activation'(input) ° epsilon = delta`
let mut delta = epsilon.clone(); let mut delta: NeuraVector<OUTPUT_LEN, f64> = epsilon.clone();
for i in 0..OUTPUT_LEN { for i in 0..OUTPUT_LEN {
delta[i] *= self.activation.derivate(evaluated[i]); delta[i] *= self.activation.derivate(evaluated[i]);
} }
// Compute the weight gradient // Compute the weight gradient
let weights_gradient = reverse_dot_product(&delta, input); let weights_gradient = delta.reverse_dot(input);
let new_epsilon = self.weights.transpose_multiply_vector(&delta);
// According to https://datascience.stackexchange.com/questions/20139/gradients-for-bias-terms-in-backpropagation // According to https://datascience.stackexchange.com/questions/20139/gradients-for-bias-terms-in-backpropagation
// The gradient of the bias is equal to the delta term of the backpropagation algorithm // The gradient of the bias is equal to the delta term of the backpropagation algorithm
let bias_gradient = delta; let bias_gradient = delta;
let new_epsilon = multiply_matrix_transpose_vector(&self.weights, &delta);
(new_epsilon, (weights_gradient, bias_gradient)) (new_epsilon, (weights_gradient, bias_gradient))
} }
@ -132,7 +134,7 @@ impl<
} }
fn regularize(&self) -> Self::Delta { fn regularize(&self) -> Self::Delta {
let mut res = ([[0.0; INPUT_LEN]; OUTPUT_LEN], [0.0; OUTPUT_LEN]); let mut res = Self::Delta::default();
for i in 0..OUTPUT_LEN { for i in 0..OUTPUT_LEN {
for j in 0..INPUT_LEN { for j in 0..INPUT_LEN {
@ -149,7 +151,10 @@ impl<
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::*; use super::*;
use crate::derivable::{activation::Relu, regularize::NeuraL0}; use crate::{
derivable::{activation::Relu, regularize::NeuraL0},
utils::uniform_vector,
};
#[test] #[test]
fn test_from_rng() { fn test_from_rng() {
@ -160,6 +165,16 @@ mod test {
for x in 0..64 { for x in 0..64 {
input[x] = rng.gen(); input[x] = rng.gen();
} }
assert!(layer.eval(&input).len() == 32); assert!(layer.eval(&input.into()).len() == 32);
}
#[test]
fn test_stack_overflow_big_layer() {
let layer = NeuraDenseLayer::from_rng(&mut rand::thread_rng(), Relu, NeuraL0)
as NeuraDenseLayer<Relu, NeuraL0, 1000, 1000>;
layer.backpropagate(&uniform_vector(), uniform_vector());
<NeuraDenseLayer<Relu, NeuraL0, 1000, 1000> as NeuraTrainableLayer>::Delta::zero();
} }
} }

@ -1,12 +1,14 @@
use rand::Rng; use rand::Rng;
use crate::algebra::NeuraVector;
use super::{NeuraLayer, NeuraTrainableLayer}; use super::{NeuraLayer, NeuraTrainableLayer};
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct NeuraDropoutLayer<const LENGTH: usize, R: Rng> { pub struct NeuraDropoutLayer<const LENGTH: usize, R: Rng> {
pub dropout_probability: f64, pub dropout_probability: f64,
multiplier: f64, multiplier: f64,
mask: [bool; LENGTH], mask: NeuraVector<LENGTH, bool>,
rng: R, rng: R,
} }
@ -15,12 +17,12 @@ impl<const LENGTH: usize, R: Rng> NeuraDropoutLayer<LENGTH, R> {
Self { Self {
dropout_probability, dropout_probability,
multiplier: 1.0, multiplier: 1.0,
mask: [false; LENGTH], mask: NeuraVector::from_value(false),
rng, rng,
} }
} }
fn apply_dropout(&self, vector: &mut [f64; LENGTH]) { fn apply_dropout(&self, vector: &mut NeuraVector<LENGTH, f64>) {
for (index, &dropout) in self.mask.iter().enumerate() { for (index, &dropout) in self.mask.iter().enumerate() {
if dropout { if dropout {
vector[index] = 0.0; vector[index] = 0.0;
@ -32,8 +34,8 @@ impl<const LENGTH: usize, R: Rng> NeuraDropoutLayer<LENGTH, R> {
} }
impl<const LENGTH: usize, R: Rng> NeuraLayer for NeuraDropoutLayer<LENGTH, R> { impl<const LENGTH: usize, R: Rng> NeuraLayer for NeuraDropoutLayer<LENGTH, R> {
type Input = [f64; LENGTH]; type Input = NeuraVector<LENGTH, f64>;
type Output = [f64; LENGTH]; type Output = NeuraVector<LENGTH, f64>;
fn eval(&self, input: &Self::Input) -> Self::Output { fn eval(&self, input: &Self::Input) -> Self::Output {
let mut result = input.clone(); let mut result = input.clone();
@ -83,7 +85,7 @@ impl<const LENGTH: usize, R: Rng> NeuraTrainableLayer for NeuraDropoutLayer<LENG
} }
fn cleanup(&mut self) { fn cleanup(&mut self) {
self.mask = [false; LENGTH]; self.mask = NeuraVector::from_value(false);
self.multiplier = 1.0; self.multiplier = 1.0;
} }
} }

@ -10,6 +10,12 @@ pub use softmax::NeuraSoftmaxLayer;
mod one_hot; mod one_hot;
pub use one_hot::NeuraOneHotLayer; pub use one_hot::NeuraOneHotLayer;
// mod reshape;
// pub use reshape::{
// NeuraFlattenLayer,
// NeuraReshapeLayer
// };
mod lock; mod lock;
pub use lock::NeuraLockLayer; pub use lock::NeuraLockLayer;
@ -98,4 +104,20 @@ macro_rules! neura_layer {
( "lock", $layer:expr ) => { ( "lock", $layer:expr ) => {
$crate::layer::NeuraLockLayer($layer) $crate::layer::NeuraLockLayer($layer)
}; };
// ( "flatten" ) => {
// $crate::layer::NeuraFlattenLayer::new() as $crate::layer::NeuraFlattenLayer<_, _, f64>
// };
// ( "flatten", $width:expr, $height:expr ) => {
// $crate::layer::NeuraFlattenLayer::new() as $crate::layer::NeuraFlattenLayer<$width, $height, f64>
// };
// ( "reshape", $height:expr ) => {
// $crate::layer::NeuraReshapeLayer::new() as $crate::layer::NeuraReshapeLayer<_, $height, f64>
// };
// ( "reshape", $width:expr, $height:expr ) => {
// $crate::layer::NeuraReshapeLayer::new() as $crate::layer::NeuraReshapeLayer<$width, $height, f64>
// };
} }

@ -1,34 +1,32 @@
use crate::algebra::{NeuraMatrix, NeuraVector};
use super::{NeuraLayer, NeuraTrainableLayer}; use super::{NeuraLayer, NeuraTrainableLayer};
/// A special layer that allows you to split a vector into one-hot vectors /// A special layer that allows you to split a vector into one-hot vectors
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub struct NeuraOneHotLayer<const CATS: usize, const LENGTH: usize>; pub struct NeuraOneHotLayer<const CATS: usize, const LENGTH: usize>;
impl<const CATS: usize, const LENGTH: usize> NeuraLayer for NeuraOneHotLayer<CATS, LENGTH> impl<const CATS: usize, const LENGTH: usize> NeuraLayer for NeuraOneHotLayer<CATS, LENGTH> {
where type Input = NeuraVector<LENGTH, f64>;
[(); LENGTH * CATS]: Sized, type Output = NeuraMatrix<LENGTH, CATS, f64>;
{
type Input = [f64; LENGTH];
type Output = [f64; LENGTH * CATS];
fn eval(&self, input: &Self::Input) -> Self::Output { fn eval(&self, input: &Self::Input) -> Self::Output {
let mut res = [0.0; LENGTH * CATS]; let mut res = NeuraMatrix::default();
for i in 0..LENGTH { for i in 0..LENGTH {
let cat_low = input[i].floor().max(0.0).min(CATS as f64 - 2.0); let cat_low = input[i].floor().max(0.0).min(CATS as f64 - 2.0);
let amount = (input[i] - cat_low).max(0.0).min(1.0); let amount = (input[i] - cat_low).max(0.0).min(1.0);
let cat_low = cat_low as usize; let cat_low = cat_low as usize;
res[i * LENGTH + cat_low] = 1.0 - amount; res[i][cat_low] = 1.0 - amount;
res[i * LENGTH + cat_low + 1] = amount; res[i][cat_low + 1] = amount;
} }
res res
} }
} }
impl<const CATS: usize, const LENGTH: usize> NeuraTrainableLayer for NeuraOneHotLayer<CATS, LENGTH> impl<const CATS: usize, const LENGTH: usize> NeuraTrainableLayer
where for NeuraOneHotLayer<CATS, LENGTH>
[(); LENGTH * CATS]: Sized,
{ {
type Delta = (); type Delta = ();
@ -37,11 +35,11 @@ where
input: &Self::Input, input: &Self::Input,
epsilon: Self::Output, epsilon: Self::Output,
) -> (Self::Input, Self::Delta) { ) -> (Self::Input, Self::Delta) {
let mut res = [0.0; LENGTH]; let mut res = NeuraVector::default();
for i in 0..LENGTH { for i in 0..LENGTH {
let cat_low = input[i].floor().max(0.0).min(CATS as f64 - 2.0) as usize; let cat_low = input[i].floor().max(0.0).min(CATS as f64 - 2.0) as usize;
let epsilon = -epsilon[i * LENGTH + cat_low] + epsilon[i * LENGTH + cat_low + 1]; let epsilon = -epsilon[i][cat_low] + epsilon[i][cat_low + 1];
// Scale epsilon by how many entries were ignored // Scale epsilon by how many entries were ignored
res[i] = epsilon * CATS as f64 / 2.0; res[i] = epsilon * CATS as f64 / 2.0;
} }

@ -0,0 +1,147 @@
//! This module is currently disabled, as it relies on `generic_const_exprs`, which is too unstable to use as of now
use super::{NeuraLayer, NeuraTrainableLayer};
/// Converts a `[[T; WIDTH]; HEIGHT]` into a `[T; WIDTH * HEIGHT]`.
/// Requires the `#![feature(generic_const_exprs)]` feature to be enabled.
pub struct NeuraFlattenLayer<const WIDTH: usize, const HEIGHT: usize, T> {
phantom: std::marker::PhantomData<T>,
}
/// Converts a `[T; WIDTH * HEIGHT]` into a `[[T; WIDTH]; HEIGHT]`.
/// Requires the `#![feature(generic_const_exprs)]` feature to be enabled.
pub struct NeuraReshapeLayer<const WIDTH: usize, const HEIGHT: usize, T> {
phantom: std::marker::PhantomData<T>,
}
#[inline(always)]
fn flatten<const WIDTH: usize, const HEIGHT: usize, T: Copy + Default>(
input: &[[T; WIDTH]; HEIGHT],
) -> [T; WIDTH * HEIGHT]
where
[T; WIDTH * HEIGHT]: Sized,
{
let mut res = [T::default(); WIDTH * HEIGHT];
// Hopefully the optimizer realizes this can be all optimized away
for i in 0..HEIGHT {
for j in 0..WIDTH {
res[i * WIDTH + j] = input[i][j];
}
}
res
}
#[inline(always)]
fn reshape<const WIDTH: usize, const HEIGHT: usize, T: Copy + Default>(
input: &[T; WIDTH * HEIGHT],
) -> [[T; WIDTH]; HEIGHT]
where
[T; WIDTH * HEIGHT]: Sized,
{
let mut res = [[T::default(); WIDTH]; HEIGHT];
// Hopefully the optimizer realizes this can be all optimized away
for i in 0..HEIGHT {
for j in 0..WIDTH {
res[i][j] = input[i * WIDTH + j];
}
}
res
}
impl<const WIDTH: usize, const HEIGHT: usize, T> NeuraFlattenLayer<WIDTH, HEIGHT, T> {
pub fn new() -> Self {
Self {
phantom: std::marker::PhantomData,
}
}
}
impl<const WIDTH: usize, const HEIGHT: usize, T> NeuraReshapeLayer<WIDTH, HEIGHT, T> {
pub fn new() -> Self {
Self {
phantom: std::marker::PhantomData,
}
}
}
impl<const WIDTH: usize, const HEIGHT: usize, T: Copy + Default> NeuraLayer
for NeuraFlattenLayer<WIDTH, HEIGHT, T>
where
[T; WIDTH * HEIGHT]: Sized,
{
type Input = [[T; WIDTH]; HEIGHT];
type Output = [T; WIDTH * HEIGHT];
#[inline(always)]
fn eval(&self, input: &Self::Input) -> Self::Output {
flatten(input)
}
}
impl<const WIDTH: usize, const HEIGHT: usize, T: Copy + Default> NeuraLayer
for NeuraReshapeLayer<WIDTH, HEIGHT, T>
where
[T; WIDTH * HEIGHT]: Sized,
{
type Input = [T; WIDTH * HEIGHT];
type Output = [[T; WIDTH]; HEIGHT];
#[inline(always)]
fn eval(&self, input: &Self::Input) -> Self::Output {
reshape(input)
}
}
impl<const WIDTH: usize, const HEIGHT: usize, T: Copy + Default> NeuraTrainableLayer
for NeuraFlattenLayer<WIDTH, HEIGHT, T>
where
[T; WIDTH * HEIGHT]: Sized,
{
type Delta = ();
fn backpropagate(
&self,
_input: &Self::Input,
epsilon: Self::Output,
) -> (Self::Input, Self::Delta) {
(reshape(&epsilon), ())
}
fn regularize(&self) -> Self::Delta {
todo!()
}
fn apply_gradient(&mut self, _gradient: &Self::Delta) {
// Noop
}
}
impl<const WIDTH: usize, const HEIGHT: usize, T: Copy + Default> NeuraTrainableLayer
for NeuraReshapeLayer<WIDTH, HEIGHT, T>
where
[T; WIDTH * HEIGHT]: Sized,
{
type Delta = ();
fn backpropagate(
&self,
_input: &Self::Input,
epsilon: Self::Output,
) -> (Self::Input, Self::Delta) {
(flatten(&epsilon), ())
}
fn regularize(&self) -> Self::Delta {
todo!()
}
fn apply_gradient(&mut self, _gradient: &Self::Delta) {
// Noop
}
}

@ -1,4 +1,4 @@
use crate::utils::multiply_vectors_pointwise; use crate::algebra::NeuraVector;
use super::{NeuraLayer, NeuraTrainableLayer}; use super::{NeuraLayer, NeuraTrainableLayer};
@ -13,16 +13,16 @@ impl<const LENGTH: usize> NeuraSoftmaxLayer<LENGTH> {
} }
impl<const LENGTH: usize> NeuraLayer for NeuraSoftmaxLayer<LENGTH> { impl<const LENGTH: usize> NeuraLayer for NeuraSoftmaxLayer<LENGTH> {
type Input = [f64; LENGTH]; type Input = NeuraVector<LENGTH, f64>;
type Output = [f64; LENGTH]; type Output = NeuraVector<LENGTH, f64>;
fn eval(&self, input: &Self::Input) -> Self::Output { fn eval(&self, input: &Self::Input) -> Self::Output {
let mut res = input.clone(); let mut res: Self::Input = input.clone();
let mut max = 0.0; let mut max = 0.0;
for item in &res { for &item in &res {
if *item > max { if item > max {
max = *item; max = item;
} }
} }
@ -55,10 +55,10 @@ impl<const LENGTH: usize> NeuraTrainableLayer for NeuraSoftmaxLayer<LENGTH> {
let evaluated = self.eval(input); let evaluated = self.eval(input);
// Compute $a_{l-1,i} \epsilon_{l,i}$ // Compute $a_{l-1,i} \epsilon_{l,i}$
epsilon = multiply_vectors_pointwise(&epsilon, &evaluated); epsilon = epsilon.hadamard_product(&evaluated);
// Compute $\sum_{k}{a_{l-1,k} \epsilon_{l,k}}$ // Compute $\sum_{k}{a_{l-1,k} \epsilon_{l,k}}$
let sum_diagonal_terms: f64 = epsilon.iter().copied().sum(); let sum_diagonal_terms: f64 = epsilon.iter().sum();
for i in 0..LENGTH { for i in 0..LENGTH {
// Multiply $\sum_{k}{a_{l-1,k} \epsilon_{l,k}}$ by $a_{l-1,i}$ and add it to $a_{l-1,i} \epsilon_{l,i}$ // Multiply $\sum_{k}{a_{l-1,k} \epsilon_{l,k}}$ by $a_{l-1,i}$ and add it to $a_{l-1,i} \epsilon_{l,i}$
@ -79,10 +79,8 @@ impl<const LENGTH: usize> NeuraTrainableLayer for NeuraSoftmaxLayer<LENGTH> {
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use crate::algebra::NeuraVectorSpace; use crate::algebra::{NeuraMatrix, NeuraVectorSpace};
use crate::utils::{ use crate::utils::uniform_vector;
matrix_from_diagonal, multiply_matrix_vector, reverse_dot_product, uniform_vector,
};
use super::*; use super::*;
@ -91,7 +89,7 @@ mod test {
const EPSILON: f64 = 0.000002; const EPSILON: f64 = 0.000002;
let layer = NeuraSoftmaxLayer::new() as NeuraSoftmaxLayer<3>; let layer = NeuraSoftmaxLayer::new() as NeuraSoftmaxLayer<3>;
let result = layer.eval(&[1.0, 2.0, 8.0]); let result = layer.eval(&[1.0, 2.0, 8.0].into());
assert!((result[0] - 0.0009088).abs() < EPSILON); assert!((result[0] - 0.0009088).abs() < EPSILON);
assert!((result[1] - 0.0024704).abs() < EPSILON); assert!((result[1] - 0.0024704).abs() < EPSILON);
@ -113,7 +111,7 @@ mod test {
for epsilon2 in [2.9, 3.1, 3.7] { for epsilon2 in [2.9, 3.1, 3.7] {
let epsilon = [epsilon1, epsilon2]; let epsilon = [epsilon1, epsilon2];
let (epsilon, _) = layer.backpropagate(&input, epsilon); let (epsilon, _) = layer.backpropagate(&input.into(), epsilon.into());
let expected = [ let expected = [
output[0] * (1.0 - output[0]) * epsilon1 output[0] * (1.0 - output[0]) * epsilon1
- output[1] * output[0] * epsilon2, - output[1] * output[0] * epsilon2,
@ -136,15 +134,15 @@ mod test {
let layer = NeuraSoftmaxLayer::new() as NeuraSoftmaxLayer<4>; let layer = NeuraSoftmaxLayer::new() as NeuraSoftmaxLayer<4>;
for _ in 0..100 { for _ in 0..100 {
let input: [f64; 4] = uniform_vector(); let input = uniform_vector::<4>();
let evaluated = layer.eval(&input); let evaluated = layer.eval(&input);
let loss: [f64; 4] = uniform_vector(); let loss = uniform_vector::<4>();
let mut derivative = reverse_dot_product(&evaluated, &evaluated); let mut derivative = evaluated.reverse_dot(&evaluated);
derivative.mul_assign(-1.0); derivative.mul_assign(-1.0);
derivative.add_assign(&matrix_from_diagonal(&evaluated)); derivative.add_assign(&NeuraMatrix::from_diagonal(&evaluated));
let expected = multiply_matrix_vector(&derivative, &loss); let expected = derivative.multiply_vector(&loss);
let (actual, _) = layer.backpropagate(&input, loss); let (actual, _) = layer.backpropagate(&input, loss);
for i in 0..4 { for i in 0..4 {

@ -1,6 +1,6 @@
#![feature(generic_arg_infer)] #![feature(generic_arg_infer)]
#![feature(generic_associated_types)] #![feature(generic_associated_types)]
#![feature(generic_const_exprs)] // #![feature(generic_const_exprs)]
pub mod algebra; pub mod algebra;
pub mod derivable; pub mod derivable;
@ -11,7 +11,7 @@ pub mod train;
mod utils; mod utils;
// TODO: move to a different file // TODO: move to a different file
pub use utils::{argmax, one_hot}; pub use utils::{argmax, cycle_shuffling, one_hot};
pub mod prelude { pub mod prelude {
// Macros // Macros
@ -21,5 +21,4 @@ pub mod prelude {
pub use crate::layer::{NeuraDenseLayer, NeuraDropoutLayer, NeuraLayer}; pub use crate::layer::{NeuraDenseLayer, NeuraDropoutLayer, NeuraLayer};
pub use crate::network::sequential::{NeuraSequential, NeuraSequentialTail}; pub use crate::network::sequential::{NeuraSequential, NeuraSequentialTail};
pub use crate::train::{NeuraBackprop, NeuraBatchedTrainer}; pub use crate::train::{NeuraBackprop, NeuraBatchedTrainer};
pub use crate::utils::cycle_shuffling;
} }

@ -8,7 +8,7 @@ use super::NeuraTrainableNetwork;
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct NeuraSequential<Layer: NeuraLayer, ChildNetwork> { pub struct NeuraSequential<Layer: NeuraLayer, ChildNetwork> {
pub layer: Layer, pub layer: Layer,
pub child_network: ChildNetwork, pub child_network: Box<ChildNetwork>,
} }
/// Operations on the tail end of a sequential network /// Operations on the tail end of a sequential network
@ -24,7 +24,7 @@ impl<Layer: NeuraLayer, ChildNetwork> NeuraSequential<Layer, ChildNetwork> {
pub fn new(layer: Layer, child_network: ChildNetwork) -> Self { pub fn new(layer: Layer, child_network: ChildNetwork) -> Self {
Self { Self {
layer, layer,
child_network, child_network: Box::new(child_network),
} }
} }
@ -36,13 +36,13 @@ impl<Layer: NeuraLayer, ChildNetwork> NeuraSequential<Layer, ChildNetwork> {
} }
pub fn trim_front(self) -> ChildNetwork { pub fn trim_front(self) -> ChildNetwork {
self.child_network *self.child_network
} }
pub fn push_front<T: NeuraLayer>(self, layer: T) -> NeuraSequential<T, Self> { pub fn push_front<T: NeuraLayer>(self, layer: T) -> NeuraSequential<T, Self> {
NeuraSequential { NeuraSequential {
layer: layer, layer: layer,
child_network: self, child_network: Box::new(self),
} }
} }
} }
@ -59,10 +59,10 @@ impl<Layer: NeuraLayer> NeuraSequentialTail for NeuraSequential<Layer, ()> {
fn push_tail<T: NeuraLayer>(self, layer: T) -> Self::TailPushed<T> { fn push_tail<T: NeuraLayer>(self, layer: T) -> Self::TailPushed<T> {
NeuraSequential { NeuraSequential {
layer: self.layer, layer: self.layer,
child_network: NeuraSequential { child_network: Box::new(NeuraSequential {
layer, layer,
child_network: (), child_network: Box::new(()),
}, }),
} }
} }
} }
@ -78,14 +78,14 @@ impl<Layer: NeuraLayer, ChildNetwork: NeuraSequentialTail> NeuraSequentialTail
fn trim_tail(self) -> Self::TailTrimmed { fn trim_tail(self) -> Self::TailTrimmed {
NeuraSequential { NeuraSequential {
layer: self.layer, layer: self.layer,
child_network: self.child_network.trim_tail(), child_network: Box::new(self.child_network.trim_tail()),
} }
} }
fn push_tail<T: NeuraLayer>(self, layer: T) -> Self::TailPushed<T> { fn push_tail<T: NeuraLayer>(self, layer: T) -> Self::TailPushed<T> {
NeuraSequential { NeuraSequential {
layer: self.layer, layer: self.layer,
child_network: self.child_network.push_tail(layer), child_network: Box::new(self.child_network.push_tail(layer)),
} }
} }
} }
@ -145,7 +145,7 @@ impl<Layer: NeuraTrainableLayer> NeuraTrainableNetwork for NeuraSequential<Layer
impl<Layer: NeuraTrainableLayer, ChildNetwork: NeuraTrainableNetwork<Input = Layer::Output>> impl<Layer: NeuraTrainableLayer, ChildNetwork: NeuraTrainableNetwork<Input = Layer::Output>>
NeuraTrainableNetwork for NeuraSequential<Layer, ChildNetwork> NeuraTrainableNetwork for NeuraSequential<Layer, ChildNetwork>
{ {
type Delta = (Layer::Delta, ChildNetwork::Delta); type Delta = (Layer::Delta, Box<ChildNetwork::Delta>);
fn apply_gradient(&mut self, gradient: &Self::Delta) { fn apply_gradient(&mut self, gradient: &Self::Delta) {
self.layer.apply_gradient(&gradient.0); self.layer.apply_gradient(&gradient.0);
@ -165,11 +165,17 @@ impl<Layer: NeuraTrainableLayer, ChildNetwork: NeuraTrainableNetwork<Input = Lay
let (backprop_gradient, layer_gradient) = let (backprop_gradient, layer_gradient) =
self.layer.backpropagate(input, backprop_gradient); self.layer.backpropagate(input, backprop_gradient);
(backprop_gradient, (layer_gradient, weights_gradient)) (
backprop_gradient,
(layer_gradient, Box::new(weights_gradient)),
)
} }
fn regularize(&self) -> Self::Delta { fn regularize(&self) -> Self::Delta {
(self.layer.regularize(), self.child_network.regularize()) (
self.layer.regularize(),
Box::new(self.child_network.regularize()),
)
} }
fn prepare_epoch(&mut self) { fn prepare_epoch(&mut self) {
@ -187,7 +193,7 @@ impl<Layer: NeuraLayer> From<Layer> for NeuraSequential<Layer, ()> {
fn from(layer: Layer) -> Self { fn from(layer: Layer) -> Self {
Self { Self {
layer, layer,
child_network: (), child_network: Box::new(()),
} }
} }
} }

@ -1,5 +1,5 @@
use crate::{ use crate::{
algebra::NeuraVectorSpace, algebra::{NeuraVector, NeuraVectorSpace},
derivable::NeuraLoss, derivable::NeuraLoss,
layer::NeuraLayer, layer::NeuraLayer,
network::{sequential::NeuraSequential, NeuraTrainableNetwork}, network::{sequential::NeuraSequential, NeuraTrainableNetwork},
@ -38,8 +38,8 @@ impl<Loss: NeuraLoss + Clone> NeuraBackprop<Loss> {
} }
} }
impl<const N: usize, Loss: NeuraLoss<Input = [f64; N]> + Clone> impl<const N: usize, Loss: NeuraLoss<Input = NeuraVector<N, f64>> + Clone>
NeuraGradientSolver<[f64; N], Loss::Target> for NeuraBackprop<Loss> NeuraGradientSolver<NeuraVector<N, f64>, Loss::Target> for NeuraBackprop<Loss>
{ {
fn get_gradient<Layer: NeuraLayer, ChildNetwork>( fn get_gradient<Layer: NeuraLayer, ChildNetwork>(
&self, &self,
@ -49,7 +49,7 @@ impl<const N: usize, Loss: NeuraLoss<Input = [f64; N]> + Clone>
) -> <NeuraSequential<Layer, ChildNetwork> as NeuraTrainableNetwork>::Delta ) -> <NeuraSequential<Layer, ChildNetwork> as NeuraTrainableNetwork>::Delta
where where
NeuraSequential<Layer, ChildNetwork>: NeuraSequential<Layer, ChildNetwork>:
NeuraTrainableNetwork<Input = Layer::Input, Output = [f64; N]>, NeuraTrainableNetwork<Input = Layer::Input, Output = NeuraVector<N, f64>>,
{ {
trainable.backpropagate(input, target, self.loss.clone()).1 trainable.backpropagate(input, target, self.loss.clone()).1
} }
@ -62,7 +62,7 @@ impl<const N: usize, Loss: NeuraLoss<Input = [f64; N]> + Clone>
) -> f64 ) -> f64
where where
NeuraSequential<Layer, ChildNetwork>: NeuraSequential<Layer, ChildNetwork>:
NeuraTrainableNetwork<Input = Layer::Input, Output = [f64; N]>, NeuraTrainableNetwork<Input = Layer::Input, Output = NeuraVector<N, f64>>,
{ {
let output = trainable.eval(&input); let output = trainable.eval(&input);
self.loss.eval(target, &output) self.loss.eval(target, &output)
@ -146,15 +146,17 @@ impl NeuraBatchedTrainer {
// Contains `momentum_factor * factor * gradient_sum_previous_iter` // Contains `momentum_factor * factor * gradient_sum_previous_iter`
let mut previous_gradient_sum = let mut previous_gradient_sum =
<NeuraSequential<Layer, ChildNetwork> as NeuraTrainableNetwork>::Delta::zero(); Box::<<NeuraSequential<Layer, ChildNetwork> as NeuraTrainableNetwork>::Delta>::zero();
'd: for iteration in 0..self.iterations { 'd: for iteration in 0..self.iterations {
let mut gradient_sum = let mut gradient_sum = Box::<
<NeuraSequential<Layer, ChildNetwork> as NeuraTrainableNetwork>::Delta::zero(); <NeuraSequential<Layer, ChildNetwork> as NeuraTrainableNetwork>::Delta,
>::zero();
network.prepare_epoch(); network.prepare_epoch();
for _ in 0..self.batch_size { for _ in 0..self.batch_size {
if let Some((input, target)) = iter.next() { if let Some((input, target)) = iter.next() {
let gradient = gradient_solver.get_gradient(&network, &input, &target); let gradient =
Box::new(gradient_solver.get_gradient(&network, &input, &target));
gradient_sum.add_assign(&gradient); gradient_sum.add_assign(&gradient);
} else { } else {
break 'd; break 'd;
@ -164,7 +166,7 @@ impl NeuraBatchedTrainer {
gradient_sum.mul_assign(factor); gradient_sum.mul_assign(factor);
// Add regularization gradient // Add regularization gradient
let mut reg_gradient = network.regularize(); let mut reg_gradient = Box::new(network.regularize());
reg_gradient.mul_assign(reg_factor); reg_gradient.mul_assign(reg_factor);
gradient_sum.add_assign(&reg_gradient); gradient_sum.add_assign(&reg_gradient);
@ -207,12 +209,15 @@ mod test {
for wa in [0.0, 0.25, 0.5, 1.0] { for wa in [0.0, 0.25, 0.5, 1.0] {
for wb in [0.0, 0.25, 0.5, 1.0] { for wb in [0.0, 0.25, 0.5, 1.0] {
let network = NeuraSequential::new( let network = NeuraSequential::new(
NeuraDenseLayer::new([[wa, wb]], [0.0], Linear, NeuraL0), NeuraDenseLayer::new([[wa, wb]].into(), [0.0].into(), Linear, NeuraL0),
(), (),
); );
let gradient = let gradient = NeuraBackprop::new(Euclidean).get_gradient(
NeuraBackprop::new(Euclidean).get_gradient(&network, &[1.0, 1.0], &[0.0]); &network,
&[1.0, 1.0].into(),
&[0.0].into(),
);
let expected = wa + wb; let expected = wa + wb;
assert!((gradient.0[0][0] - expected) < 0.001); assert!((gradient.0[0][0] - expected) < 0.001);
@ -226,24 +231,33 @@ mod test {
const EPSILON: f64 = 0.00001; const EPSILON: f64 = 0.00001;
// Test that we get the same values as https://hmkcode.com/ai/backpropagation-step-by-step/ // Test that we get the same values as https://hmkcode.com/ai/backpropagation-step-by-step/
let network = neura_sequential![ let network = neura_sequential![
NeuraDenseLayer::new([[0.11, 0.21], [0.12, 0.08]], [0.0; 2], Linear, NeuraL0), NeuraDenseLayer::new(
NeuraDenseLayer::new([[0.14, 0.15]], [0.0], Linear, NeuraL0) [[0.11, 0.21], [0.12, 0.08]].into(),
[0.0; 2].into(),
Linear,
NeuraL0
),
NeuraDenseLayer::new([[0.14, 0.15]].into(), [0.0].into(), Linear, NeuraL0)
]; ];
let input = [2.0, 3.0]; let input = [2.0, 3.0];
let target = [1.0]; let target = [1.0];
let intermediary = network.clone().trim_tail().eval(&input); let intermediary = network.clone().trim_tail().eval(&input.into());
assert_approx!(0.85, intermediary[0], EPSILON); assert_approx!(0.85, intermediary[0], EPSILON);
assert_approx!(0.48, intermediary[1], EPSILON); assert_approx!(0.48, intermediary[1], EPSILON);
assert_approx!(0.191, network.eval(&input)[0], EPSILON); assert_approx!(0.191, network.eval(&input.into())[0], EPSILON);
assert_approx!(0.327, Euclidean.eval(&target, &network.eval(&input)), 0.001); assert_approx!(
0.327,
Euclidean.eval(&target.into(), &network.eval(&input.into())),
0.001
);
let delta = network.eval(&input)[0] - target[0]; let delta = network.eval(&input.into())[0] - target[0];
let (gradient_first, gradient_second) = let (gradient_first, gradient_second) =
NeuraBackprop::new(Euclidean).get_gradient(&network, &input, &target); NeuraBackprop::new(Euclidean).get_gradient(&network, &input.into(), &target.into());
let gradient_first = gradient_first.0; let gradient_first = gradient_first.0;
let gradient_second = gradient_second.0[0]; let gradient_second = gradient_second.0[0];

@ -1,79 +1,4 @@
pub(crate) fn multiply_matrix_vector<const WIDTH: usize, const HEIGHT: usize>( use crate::algebra::NeuraVector;
matrix: &[[f64; WIDTH]; HEIGHT],
vector: &[f64; WIDTH],
) -> [f64; HEIGHT] {
let mut result = [0.0; HEIGHT];
for i in 0..HEIGHT {
let mut sum = 0.0;
for k in 0..WIDTH {
sum += matrix[i][k] * vector[k];
}
result[i] = sum;
}
result
}
/// Equivalent to `multiply_matrix_vector(transpose(matrix), vector)`.
pub(crate) fn multiply_matrix_transpose_vector<const WIDTH: usize, const HEIGHT: usize>(
matrix: &[[f64; WIDTH]; HEIGHT],
vector: &[f64; HEIGHT],
) -> [f64; WIDTH] {
let mut result = [0.0; WIDTH];
for i in 0..WIDTH {
let mut sum = 0.0;
for k in 0..HEIGHT {
sum += matrix[k][i] * vector[k];
}
result[i] = sum;
}
result
}
// Returns $left^{\top} \cdot right$, ie. $\ket{left} \bra{right}$
pub(crate) fn reverse_dot_product<const WIDTH: usize, const HEIGHT: usize>(
left: &[f64; HEIGHT],
right: &[f64; WIDTH],
) -> [[f64; WIDTH]; HEIGHT] {
let mut result = [[0.0; WIDTH]; HEIGHT];
for i in 0..HEIGHT {
for j in 0..WIDTH {
result[i][j] = left[i] * right[j];
}
}
result
}
pub(crate) fn multiply_vectors_pointwise<const LENGTH: usize>(
left: &[f64; LENGTH],
right: &[f64; LENGTH],
) -> [f64; LENGTH] {
let mut result = [0.0; LENGTH];
for i in 0..LENGTH {
result[i] = left[i] * right[i];
}
result
}
#[cfg(test)]
pub(crate) fn matrix_from_diagonal<const LENGTH: usize>(
vector: &[f64; LENGTH],
) -> [[f64; LENGTH]; LENGTH] {
let mut result = [[0.0; LENGTH]; LENGTH];
for i in 0..LENGTH {
result[i][i] = vector[i];
}
result
}
#[allow(dead_code)] #[allow(dead_code)]
pub(crate) fn assign_add_vector<const N: usize>(sum: &mut [f64; N], operand: &[f64; N]) { pub(crate) fn assign_add_vector<const N: usize>(sum: &mut [f64; N], operand: &[f64; N]) {
@ -164,9 +89,10 @@ where
} }
#[cfg(test)] #[cfg(test)]
pub(crate) fn uniform_vector<const LENGTH: usize>() -> [f64; LENGTH] { pub(crate) fn uniform_vector<const LENGTH: usize>() -> NeuraVector<LENGTH, f64> {
use rand::Rng; use rand::Rng;
let mut res = [0.0; LENGTH];
let mut res: NeuraVector<LENGTH, f64> = NeuraVector::default();
let mut rng = rand::thread_rng(); let mut rng = rand::thread_rng();
for i in 0..LENGTH { for i in 0..LENGTH {
@ -176,8 +102,8 @@ pub(crate) fn uniform_vector<const LENGTH: usize>() -> [f64; LENGTH] {
res res
} }
pub fn one_hot<const N: usize>(value: usize) -> [f64; N] { pub fn one_hot<const N: usize>(value: usize) -> NeuraVector<N, f64> {
let mut res = [0.0; N]; let mut res = NeuraVector::default();
if value < N { if value < N {
res[value] = 1.0; res[value] = 1.0;
} }
@ -196,25 +122,6 @@ pub fn argmax(array: &[f64]) -> usize {
res res
} }
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_reverse_dot_product() {
let left = [2.0, 3.0, 5.0];
let right = [7.0, 11.0, 13.0, 17.0];
let expected = [
[14.0, 22.0, 26.0, 34.0],
[21.0, 33.0, 39.0, 51.0],
[35.0, 55.0, 65.0, 85.0],
];
assert_eq!(expected, reverse_dot_product(&left, &right));
}
}
#[cfg(test)] #[cfg(test)]
#[macro_export] #[macro_export]
macro_rules! assert_approx { macro_rules! assert_approx {

Loading…
Cancel
Save