Add NeuraIsolateLayer, for more versatility in resnets

main
Shad Amethyst 2 years ago
parent 872cb3a6ce
commit 972b177767

@ -5,6 +5,9 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[features]
visualization = ["dep:image", "dep:viuer"]
[dependencies] [dependencies]
boxed-array = "0.1.0" boxed-array = "0.1.0"
nalgebra = { version = "^0.32", features = ["std", "macros", "rand", "serde-serialize"] } nalgebra = { version = "^0.32", features = ["std", "macros", "rand", "serde-serialize"] }
@ -14,6 +17,8 @@ num = "^0.4"
rand = "^0.8" rand = "^0.8"
rand_distr = "0.4.3" rand_distr = "0.4.3"
textplots = "0.8.0" textplots = "0.8.0"
image = { version = "0.24.6", optional = true }
viuer = { version = "0.6.2", optional = true }
[dev-dependencies] [dev-dependencies]
image = "0.24.6" image = "0.24.6"
@ -24,3 +29,15 @@ approx = "0.5.1"
[profile.release] [profile.release]
debug = true debug = true
[[example]]
name = "densenet"
required-features = ["visualization"]
[[example]]
name = "mnist-decoder"
required-features = ["visualization"]
[[example]]
name = "mnist-diffusion"
required-features = ["visualization"]

@ -51,16 +51,16 @@ fn main() {
&test_inputs, &test_inputs,
); );
draw_neuron_activation( neuramethyst::draw_neuron_activation(
|input| { |input| {
let output = network.eval(&dvector![input[0] as f32, input[1] as f32]); let output = network.eval(&dvector![input[0], input[1]]);
let estimation = output[0] / (output[0] + output[1]); let estimation = output[0] / (output[0] + output[1]);
let color = network.eval(&dvector![input[0] as f32, input[1] as f32]); let color = network.eval(&dvector![input[0], input[1]]);
(&color / color.map(|x| x * x).sum() * estimation) (&color / color.map(|x| x * x).sum() * estimation)
.into_iter() .into_iter()
.map(|x| x.abs() as f64) .map(|x| x.abs() as f32)
.collect::<Vec<_>>() .collect::<Vec<_>>()
}, },
6.0, 6.0,
@ -96,42 +96,6 @@ fn main() {
} }
} }
// TODO: move this to the library?
fn draw_neuron_activation<F: Fn([f64; 2]) -> Vec<f64>>(callback: F, scale: f64) {
use viuer::Config;
const WIDTH: u32 = 64;
const HEIGHT: u32 = 64;
let mut image = image::RgbImage::new(WIDTH, HEIGHT);
fn sigmoid(x: f64) -> f64 {
1.9 / (1.0 + (-x * 3.0).exp()) - 0.9
}
for y in 0..HEIGHT {
let y2 = 2.0 * y as f64 / HEIGHT as f64 - 1.0;
for x in 0..WIDTH {
let x2 = 2.0 * x as f64 / WIDTH as f64 - 1.0;
let activation = callback([x2 * scale, y2 * scale]);
let r = (sigmoid(activation.get(0).copied().unwrap_or(-1.0)) * 255.0).floor() as u8;
let g = (sigmoid(activation.get(1).copied().unwrap_or(-1.0)) * 255.0).floor() as u8;
let b = (sigmoid(activation.get(2).copied().unwrap_or(-1.0)) * 255.0).floor() as u8;
*image.get_pixel_mut(x, y) = image::Rgb([r, g, b]);
}
}
let config = Config {
use_kitty: false,
truecolor: true,
// absolute_offset: false,
..Default::default()
};
viuer::print(&image::DynamicImage::ImageRgb8(image), &config).unwrap();
}
fn one_hot(value: usize, categories: usize) -> DVector<f32> { fn one_hot(value: usize, categories: usize) -> DVector<f32> {
let mut res = DVector::from_element(categories, 0.0); let mut res = DVector::from_element(categories, 0.0);
if value < categories { if value < categories {

@ -0,0 +1,144 @@
use nalgebra::DVector;
use rust_mnist::Mnist;
use neuramethyst::{
argmax, cycle_shuffling,
derivable::{
activation::{Linear, Logistic, Relu, Swish, Tanh},
loss::{CrossEntropy, Euclidean},
},
plot_losses,
prelude::*,
};
const TRAIN_SIZE: usize = 50000;
const TEST_SIZE: usize = 1000;
const WIDTH: usize = 28;
const HEIGHT: usize = 28;
const LATENT_SIZE: usize = 25;
pub fn main() {
let Mnist {
train_data: train_images,
train_labels,
test_data: test_images,
test_labels,
..
} = Mnist::new("data/");
let train_images = train_images
.into_iter()
.map(|raw| {
DVector::from_iterator(WIDTH * HEIGHT, raw.into_iter().map(|x| x as f32 / 255.0))
})
.take(TRAIN_SIZE);
let train_labels = train_labels
.into_iter()
.map(|x| one_hot(x as usize, 10))
.take(TRAIN_SIZE);
let test_images = test_images
.into_iter()
.map(|raw| {
DVector::from_iterator(WIDTH * HEIGHT, raw.into_iter().map(|x| x as f32 / 255.0))
})
.take(TEST_SIZE);
let test_labels = test_labels
.into_iter()
.map(|x| one_hot(x as usize, 10))
.take(TEST_SIZE);
let test_data = test_images
.clone()
.zip(test_images.clone())
.collect::<Vec<_>>();
// First, train an encoder-decoder network (unsupervised)
let mut network = neura_sequential![
neura_layer!("dense", 100).activation(Swish(Logistic)),
neura_layer!("dense", 50).activation(Swish(Logistic)),
neura_layer!("dense", LATENT_SIZE).activation(Tanh),
neura_layer!("dense", 50),
neura_layer!("dense", 100),
neura_layer!("dense", WIDTH * HEIGHT).activation(Relu),
]
.construct(NeuraShape::Vector(WIDTH * HEIGHT))
.unwrap();
let trainer = NeuraBatchedTrainer::with_epochs(0.03, 75, 512, TRAIN_SIZE);
// trainer.log_iterations = 1;
let losses = trainer.train(
&NeuraBackprop::new(Euclidean),
&mut network,
cycle_shuffling(
train_images.clone().zip(train_images.clone()),
rand::thread_rng(),
),
&test_data,
);
plot_losses(losses, 128, 48);
// Then, train a small network to decode the encoded data into the categories
let trimmed_network = network.clone().trim_tail().trim_tail().trim_tail();
let mut network = neura_sequential![
..trimmed_network.lock(),
neura_layer!("dense", LATENT_SIZE)
.activation(Tanh)
.construct(NeuraShape::Vector(LATENT_SIZE))
.unwrap(),
neura_layer!("dense", 10)
.activation(Linear)
.construct(NeuraShape::Vector(LATENT_SIZE))
.unwrap(),
neura_layer!("softmax")
];
let test_data = test_images
.clone()
.zip(test_labels.clone())
.collect::<Vec<_>>();
let trainer = NeuraBatchedTrainer::with_epochs(0.03, 20, 128, TRAIN_SIZE);
plot_losses(
trainer.train(
&NeuraBackprop::new(Euclidean),
&mut network,
cycle_shuffling(train_images.clone().zip(train_labels), rand::thread_rng()),
&test_data,
),
128,
48,
);
let mut correct = 0;
for (test_image, test_label) in test_images.zip(test_labels) {
let guess = network.eval(&test_image);
let guess = argmax(guess.as_slice());
let actual = argmax(test_label.as_slice());
if guess == actual {
correct += 1;
}
}
println!("");
println!(
"{} correct out of {}: {:.2}%",
correct,
TEST_SIZE,
(correct as f32 / TEST_SIZE as f32) * 100.0
);
}
fn one_hot(value: usize, categories: usize) -> DVector<f32> {
let mut res = DVector::from_element(categories, 0.0);
if value < categories {
res[value] = 1.0;
}
res
}

@ -0,0 +1,213 @@
use nalgebra::DVector;
use rand::Rng;
use rust_mnist::Mnist;
use std::io::Write;
use neuramethyst::{
cycle_shuffling,
derivable::{
activation::{Logistic, Relu, Swish},
loss::Euclidean,
regularize::NeuraL2,
},
plot_losses,
prelude::*,
};
const TRAIN_SIZE: usize = 50000;
const TEST_SIZE: usize = 1000;
const WIDTH: usize = 28;
const HEIGHT: usize = 28;
const REG_RATE: f32 = 0.003;
const EPOCHS: usize = 80;
// const BASE_NOISE: f32 = 0.05;
const NOISE_AMOUNT: f32 = 0.5;
const SHIFT_AMOUNT: i32 = 9;
pub fn main() {
let Mnist {
train_data: train_images,
train_labels,
test_data: test_images,
test_labels,
..
} = Mnist::new("data/");
let train_images = train_images
.into_iter()
.map(|raw| {
DVector::from_iterator(WIDTH * HEIGHT, raw.into_iter().map(|x| x as f32 / 255.0))
})
.take(TRAIN_SIZE);
let train_labels = train_labels
.into_iter()
.map(|x| one_hot(x as usize, 10))
.take(TRAIN_SIZE);
let test_images = test_images
.into_iter()
.map(|raw| {
DVector::from_iterator(WIDTH * HEIGHT, raw.into_iter().map(|x| x as f32 / 255.0))
})
.take(TEST_SIZE);
let test_labels = test_labels
.into_iter()
.map(|x| one_hot(x as usize, 10))
.take(TEST_SIZE);
let test_data: Vec<_> = augment_data(test_images.zip(test_labels)).collect();
let mut network = neura_residual![
<= 0, 1;
neura_layer!("isolate", WIDTH * HEIGHT, WIDTH * HEIGHT + 10) => 1, 3, 5, 7, 9, 10;
neura_layer!("isolate", 0, WIDTH * HEIGHT) => 0, 1, 3;
neura_layer!("dense", 100).regularization(NeuraL2(REG_RATE)).activation(Swish(Logistic)) => 0, 2;
neura_layer!("dropout", 0.5);
neura_layer!("dense", 50).regularization(NeuraL2(REG_RATE)).activation(Swish(Logistic)) => 0, 2, 4;
neura_layer!("dropout", 0.5);
neura_layer!("dense", 50).regularization(NeuraL2(REG_RATE)).activation(Swish(Logistic)) => 0, 2;
neura_layer!("dropout", 0.33);
neura_layer!("dense", 25).regularization(NeuraL2(REG_RATE)).activation(Swish(Logistic)) => 0, 2;
neura_layer!("dropout", 0.33);
neura_layer!("dense", 25).regularization(NeuraL2(REG_RATE)).activation(Swish(Logistic));
// neura_layer!("dropout", 0.33);
neura_layer!("dense", WIDTH * HEIGHT).activation(Relu);
]
.construct(NeuraShape::Vector(WIDTH * HEIGHT + 10))
.unwrap();
let trainer = NeuraBatchedTrainer::with_epochs(0.03, EPOCHS, 512, TRAIN_SIZE);
// trainer.log_iterations = 1;
let train_data = augment_data(cycle_shuffling(
train_images.clone().zip(train_labels.clone()),
rand::thread_rng(),
));
let losses = trainer.train(
&NeuraBackprop::new(Euclidean),
&mut network,
train_data,
&test_data,
);
plot_losses(losses, 128, 48);
loop {
let mut image = uniform_vector(WIDTH * HEIGHT + 10);
let mut buffer = String::new();
print!("> ");
std::io::stdout().flush().unwrap();
if let Err(_) = std::io::stdin().read_line(&mut buffer) {
break;
}
for i in 0..10 {
image[WIDTH * HEIGHT + i] = buffer
.chars()
.any(|c| c == char::from_digit(i as u32, 10).unwrap())
as u8 as f32;
}
for _iter in 0..5 {
let new_image = network.eval(&image);
neuramethyst::draw_neuron_activation(
|[x, y]| {
let x = ((x + 1.0) / 2.0 * WIDTH as f32) as usize;
let y = ((y + 1.0) / 2.0 * HEIGHT as f32) as usize;
let index = x + y * WIDTH;
vec![new_image[index]]
},
1.0,
WIDTH as u32,
HEIGHT as u32,
);
for i in 0..(WIDTH * HEIGHT) {
image[i] = new_image[i] * 0.6 + image[i] * 0.3;
}
std::thread::sleep(std::time::Duration::new(0, 100_000_000));
}
}
}
fn uniform_vector(length: usize) -> DVector<f32> {
let mut res = DVector::from_element(length, 0.0);
let mut rng = rand::thread_rng();
for i in 0..length {
res[i] = rng.gen();
}
res
}
fn one_hot(value: usize, categories: usize) -> DVector<f32> {
let mut res = DVector::from_element(categories, 0.0);
if value < categories {
res[value] = 1.0;
}
res
}
fn add_noise(mut image: DVector<f32>, rng: &mut impl Rng, amount: f32) -> DVector<f32> {
if amount <= 0.0 {
return image;
}
let uniform = rand::distributions::Uniform::new(0.0, amount);
for i in 0..image.len() {
let x = rng.sample(uniform);
image[i] = image[i] * (1.0 - x) + (1.0 - image[i]) * x;
}
image
}
fn shift(image: &DVector<f32>, dx: i32, dy: i32) -> DVector<f32> {
let mut res = DVector::from_element(image.len(), 0.0);
let width = WIDTH as i32;
let height = HEIGHT as i32;
for y in 0..height {
for x in 0..width {
let x2 = x + dx;
let y2 = y + dy;
if y2 < 0 || y2 >= height || x2 < 0 || x2 >= width {
continue;
}
res[(y2 * width + x2) as usize] = image[(y * width + x) as usize];
}
}
res
}
fn augment_data(
iter: impl Iterator<Item = (DVector<f32>, DVector<f32>)>,
) -> impl Iterator<Item = (DVector<f32>, DVector<f32>)> {
let mut rng = rand::thread_rng();
iter.map(move |(image, label)| {
let noise_amount = rng.gen_range(0.05..NOISE_AMOUNT);
let base_image = shift(
&image,
rng.gen_range(-SHIFT_AMOUNT..SHIFT_AMOUNT),
rng.gen_range(-SHIFT_AMOUNT..SHIFT_AMOUNT),
) * rng.gen_range(0.6..1.0);
// let base_image = add_noise(base_image, &mut rng, base_noise);
let noisy_image = add_noise(base_image.clone(), &mut rng, noise_amount);
(
DVector::from_iterator(
WIDTH * HEIGHT + 10,
noisy_image.iter().copied().chain(label.iter().copied()),
),
image,
)
})
}

@ -0,0 +1,161 @@
use nalgebra::{DVector, Scalar};
use super::*;
/// **Class invariant:** start and end are
#[derive(Clone, Debug)]
pub struct NeuraIsolateLayer {
start: NeuraShape,
end: NeuraShape,
}
#[derive(Clone, Debug)]
pub enum NeuraIsolateLayerErr {
Incompatible {
start: NeuraShape,
end: NeuraShape,
input_shape: NeuraShape,
},
OutOfBound {
start: NeuraShape,
end: NeuraShape,
input_shape: NeuraShape,
},
OutOfOrder {
start: NeuraShape,
end: NeuraShape,
},
}
impl NeuraIsolateLayer {
pub fn new<T: Into<NeuraShape>>(start: T, end: T) -> Option<Self> {
let start = start.into();
let end = end.into();
if start.is_compatible(end) {
Some(Self { start, end })
} else {
None
}
}
}
impl NeuraShapedLayer for NeuraIsolateLayer {
fn output_shape(&self) -> NeuraShape {
self.end.sub(self.start).unwrap_or_else(|| unreachable!())
}
}
impl NeuraPartialLayer for NeuraIsolateLayer {
type Constructed = NeuraIsolateLayer;
type Err = NeuraIsolateLayerErr;
fn construct(self, input_shape: NeuraShape) -> Result<Self::Constructed, Self::Err> {
use NeuraShape::*;
let start = self.start;
let end = self.end;
match (input_shape, start, end) {
(Vector(xi), Vector(xs), Vector(xe)) => {
if xs >= xe {
return Err(NeuraIsolateLayerErr::OutOfOrder { start, end });
}
if xs >= xi || xe > xi {
return Err(NeuraIsolateLayerErr::OutOfBound {
start,
end,
input_shape,
});
}
Ok(self)
}
(Matrix(_xi, _yi), Matrix(_xs, _ys), Matrix(_xe, _ye)) => unimplemented!(),
(Tensor(_xi, _yi, _zi), Tensor(_xs, _ys, _zs), Tensor(_xe, _ye, _ze)) => {
unimplemented!()
}
_ => Err(NeuraIsolateLayerErr::Incompatible {
start,
end,
input_shape,
}),
}
}
}
impl<F: Clone + Scalar> NeuraLayer<DVector<F>> for NeuraIsolateLayer {
type Output = DVector<F>;
fn eval(&self, input: &DVector<F>) -> Self::Output {
let (NeuraShape::Vector(start), NeuraShape::Vector(end)) = (self.start, self.end) else {
panic!("NeuraIsolateLayer expected a value of dimension {}, got a vector", self.start.dims());
};
DVector::from_iterator(end - start, input.iter().cloned().skip(start).take(end))
}
}
impl NeuraTrainableLayerBase for NeuraIsolateLayer {
type Gradient = ();
#[inline(always)]
fn default_gradient(&self) -> Self::Gradient {
()
}
#[inline(always)]
fn apply_gradient(&mut self, _gradient: &Self::Gradient) {
// Noop
}
}
impl<F: Clone + Scalar> NeuraTrainableLayerEval<DVector<F>> for NeuraIsolateLayer {
type IntermediaryRepr = ();
fn eval_training(&self, input: &DVector<F>) -> (Self::Output, Self::IntermediaryRepr) {
(self.eval(input), ())
}
}
impl<Input> NeuraTrainableLayerSelf<Input> for NeuraIsolateLayer
where
Self: NeuraTrainableLayerEval<Input>,
{
#[inline(always)]
fn regularize_layer(&self) -> Self::Gradient {
()
}
#[inline(always)]
fn get_gradient(
&self,
_input: &Input,
_intermediary: &Self::IntermediaryRepr,
_epsilon: &Self::Output,
) -> Self::Gradient {
()
}
}
impl<F: Clone + Scalar + Default> NeuraTrainableLayerBackprop<DVector<F>> for NeuraIsolateLayer {
fn backprop_layer(
&self,
input: &DVector<F>,
_intermediary: &Self::IntermediaryRepr,
epsilon: &Self::Output,
) -> DVector<F> {
let mut result = DVector::from_element(input.len(), F::default());
let NeuraShape::Vector(start) = self.start else {
unreachable!();
};
for i in 0..epsilon.len() {
result[start + i] = epsilon[i].clone();
}
result
}
}

@ -4,6 +4,7 @@ use self::lock::NeuraLockLayer;
pub mod dense; pub mod dense;
pub mod dropout; pub mod dropout;
pub mod isolate;
pub mod lock; pub mod lock;
pub mod normalize; pub mod normalize;
pub mod softmax; pub mod softmax;
@ -23,6 +24,55 @@ impl NeuraShape {
NeuraShape::Tensor(rows, columns, channels) => rows * columns * channels, NeuraShape::Tensor(rows, columns, channels) => rows * columns * channels,
} }
} }
pub fn sub(&self, other: NeuraShape) -> Option<NeuraShape> {
use NeuraShape::*;
Some(match (other, self) {
(Vector(x1), Vector(x2)) => Vector(x2 - x1),
(Matrix(x1, y1), Matrix(x2, y2)) => Matrix(x2 - x1, y2 - y1),
(Tensor(x1, y1, z1), Tensor(x2, y2, z2)) => Tensor(x2 - x1, y2 - y1, z2 - z1),
_ => return None,
})
}
pub fn is_compatible(&self, other: NeuraShape) -> bool {
use NeuraShape::*;
matches!(
(self, other),
(Vector(_), Vector(_))
| (Matrix(_, _), Matrix(_, _))
| (Tensor(_, _, _), Tensor(_, _, _))
)
}
pub fn dims(&self) -> usize {
match self {
NeuraShape::Vector(_) => 1,
NeuraShape::Matrix(_, _) => 2,
NeuraShape::Tensor(_, _, _) => 3,
}
}
}
impl From<usize> for NeuraShape {
fn from(x: usize) -> Self {
NeuraShape::Vector(x)
}
}
impl From<(usize, usize)> for NeuraShape {
fn from((x, y): (usize, usize)) -> Self {
NeuraShape::Matrix(x, y)
}
}
impl From<(usize, usize, usize)> for NeuraShape {
fn from((x, y, z): (usize, usize, usize)) -> Self {
NeuraShape::Tensor(x, y, z)
}
} }
pub trait NeuraLayer<Input> { pub trait NeuraLayer<Input> {
@ -217,4 +267,8 @@ macro_rules! neura_layer {
( "normalize" ) => { ( "normalize" ) => {
$crate::layer::normalize::NeuraNormalizeLayer::new() $crate::layer::normalize::NeuraNormalizeLayer::new()
}; };
( "isolate", $start:expr, $end:expr ) => {
$crate::layer::isolate::NeuraIsolateLayer::new($start, $end).unwrap()
};
} }

@ -10,6 +10,9 @@ mod utils;
// TODO: move to a different file // TODO: move to a different file
pub use utils::{argmax, cycle_shuffling, one_hot, plot_losses}; pub use utils::{argmax, cycle_shuffling, one_hot, plot_losses};
#[cfg(feature = "visualization")]
pub use utils::draw_neuron_activation;
/// Common traits and structs that are useful to use this library. /// Common traits and structs that are useful to use this library.
/// All of these traits are prefixed with the word "neura" in some way, /// All of these traits are prefixed with the word "neura" in some way,
/// so there should not be any conflicts when doing a wildcard import of `prelude`. /// so there should not be any conflicts when doing a wildcard import of `prelude`.

@ -1,6 +1,6 @@
use std::borrow::Cow; use std::borrow::Cow;
use crate::{network::*, utils::unwrap_or_clone}; use crate::network::*;
use super::*; use super::*;

@ -97,6 +97,7 @@ pub(crate) fn uniform_vector(length: usize) -> nalgebra::DVector<f64> {
DVector::from_fn(length, |_, _| -> f64 { rng.gen() }) DVector::from_fn(length, |_, _| -> f64 { rng.gen() })
} }
#[deprecated]
pub fn one_hot<const N: usize>(value: usize) -> NeuraVector<N, f64> { pub fn one_hot<const N: usize>(value: usize) -> NeuraVector<N, f64> {
let mut res = NeuraVector::default(); let mut res = NeuraVector::default();
if value < N { if value < N {
@ -154,3 +155,41 @@ pub(crate) fn unwrap_or_clone<T: Clone>(value: std::rc::Rc<T>) -> T {
// TODO: replace with Rc::unwrap_or_clone once https://github.com/rust-lang/rust/issues/93610 is closed // TODO: replace with Rc::unwrap_or_clone once https://github.com/rust-lang/rust/issues/93610 is closed
std::rc::Rc::try_unwrap(value).unwrap_or_else(|value| (*value).clone()) std::rc::Rc::try_unwrap(value).unwrap_or_else(|value| (*value).clone())
} }
#[cfg(feature = "visualization")]
pub fn draw_neuron_activation<F: Fn([f32; 2]) -> Vec<f32>>(
callback: F,
scale: f32,
width: u32,
height: u32,
) {
use viuer::Config;
let mut image = image::RgbImage::new(width, height);
fn sigmoid(x: f32) -> f32 {
1.9 / (1.0 + (-x * 3.0).exp()) - 0.9
}
for y in 0..height {
let y2 = 2.0 * y as f32 / height as f32 - 1.0;
for x in 0..width {
let x2 = 2.0 * x as f32 / width as f32 - 1.0;
let activation = callback([x2 * scale, y2 * scale]);
let r = (sigmoid(activation.get(0).copied().unwrap_or(-1.0)) * 255.0).floor() as u8;
let g = (sigmoid(activation.get(1).copied().unwrap_or(-1.0)) * 255.0).floor() as u8;
let b = (sigmoid(activation.get(2).copied().unwrap_or(-1.0)) * 255.0).floor() as u8;
*image.get_pixel_mut(x, y) = image::Rgb([r, g, b]);
}
}
let config = Config {
use_kitty: false,
truecolor: true,
// absolute_offset: false,
..Default::default()
};
viuer::print(&image::DynamicImage::ImageRgb8(image), &config).unwrap();
}

Loading…
Cancel
Save