parent
872cb3a6ce
commit
972b177767
@ -0,0 +1,144 @@
|
||||
use nalgebra::DVector;
|
||||
use rust_mnist::Mnist;
|
||||
|
||||
use neuramethyst::{
|
||||
argmax, cycle_shuffling,
|
||||
derivable::{
|
||||
activation::{Linear, Logistic, Relu, Swish, Tanh},
|
||||
loss::{CrossEntropy, Euclidean},
|
||||
},
|
||||
plot_losses,
|
||||
prelude::*,
|
||||
};
|
||||
|
||||
const TRAIN_SIZE: usize = 50000;
|
||||
const TEST_SIZE: usize = 1000;
|
||||
const WIDTH: usize = 28;
|
||||
const HEIGHT: usize = 28;
|
||||
const LATENT_SIZE: usize = 25;
|
||||
|
||||
pub fn main() {
|
||||
let Mnist {
|
||||
train_data: train_images,
|
||||
train_labels,
|
||||
test_data: test_images,
|
||||
test_labels,
|
||||
..
|
||||
} = Mnist::new("data/");
|
||||
|
||||
let train_images = train_images
|
||||
.into_iter()
|
||||
.map(|raw| {
|
||||
DVector::from_iterator(WIDTH * HEIGHT, raw.into_iter().map(|x| x as f32 / 255.0))
|
||||
})
|
||||
.take(TRAIN_SIZE);
|
||||
let train_labels = train_labels
|
||||
.into_iter()
|
||||
.map(|x| one_hot(x as usize, 10))
|
||||
.take(TRAIN_SIZE);
|
||||
|
||||
let test_images = test_images
|
||||
.into_iter()
|
||||
.map(|raw| {
|
||||
DVector::from_iterator(WIDTH * HEIGHT, raw.into_iter().map(|x| x as f32 / 255.0))
|
||||
})
|
||||
.take(TEST_SIZE);
|
||||
let test_labels = test_labels
|
||||
.into_iter()
|
||||
.map(|x| one_hot(x as usize, 10))
|
||||
.take(TEST_SIZE);
|
||||
|
||||
let test_data = test_images
|
||||
.clone()
|
||||
.zip(test_images.clone())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// First, train an encoder-decoder network (unsupervised)
|
||||
|
||||
let mut network = neura_sequential![
|
||||
neura_layer!("dense", 100).activation(Swish(Logistic)),
|
||||
neura_layer!("dense", 50).activation(Swish(Logistic)),
|
||||
neura_layer!("dense", LATENT_SIZE).activation(Tanh),
|
||||
neura_layer!("dense", 50),
|
||||
neura_layer!("dense", 100),
|
||||
neura_layer!("dense", WIDTH * HEIGHT).activation(Relu),
|
||||
]
|
||||
.construct(NeuraShape::Vector(WIDTH * HEIGHT))
|
||||
.unwrap();
|
||||
|
||||
let trainer = NeuraBatchedTrainer::with_epochs(0.03, 75, 512, TRAIN_SIZE);
|
||||
// trainer.log_iterations = 1;
|
||||
|
||||
let losses = trainer.train(
|
||||
&NeuraBackprop::new(Euclidean),
|
||||
&mut network,
|
||||
cycle_shuffling(
|
||||
train_images.clone().zip(train_images.clone()),
|
||||
rand::thread_rng(),
|
||||
),
|
||||
&test_data,
|
||||
);
|
||||
|
||||
plot_losses(losses, 128, 48);
|
||||
|
||||
// Then, train a small network to decode the encoded data into the categories
|
||||
|
||||
let trimmed_network = network.clone().trim_tail().trim_tail().trim_tail();
|
||||
|
||||
let mut network = neura_sequential![
|
||||
..trimmed_network.lock(),
|
||||
neura_layer!("dense", LATENT_SIZE)
|
||||
.activation(Tanh)
|
||||
.construct(NeuraShape::Vector(LATENT_SIZE))
|
||||
.unwrap(),
|
||||
neura_layer!("dense", 10)
|
||||
.activation(Linear)
|
||||
.construct(NeuraShape::Vector(LATENT_SIZE))
|
||||
.unwrap(),
|
||||
neura_layer!("softmax")
|
||||
];
|
||||
let test_data = test_images
|
||||
.clone()
|
||||
.zip(test_labels.clone())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let trainer = NeuraBatchedTrainer::with_epochs(0.03, 20, 128, TRAIN_SIZE);
|
||||
|
||||
plot_losses(
|
||||
trainer.train(
|
||||
&NeuraBackprop::new(Euclidean),
|
||||
&mut network,
|
||||
cycle_shuffling(train_images.clone().zip(train_labels), rand::thread_rng()),
|
||||
&test_data,
|
||||
),
|
||||
128,
|
||||
48,
|
||||
);
|
||||
|
||||
let mut correct = 0;
|
||||
for (test_image, test_label) in test_images.zip(test_labels) {
|
||||
let guess = network.eval(&test_image);
|
||||
let guess = argmax(guess.as_slice());
|
||||
let actual = argmax(test_label.as_slice());
|
||||
|
||||
if guess == actual {
|
||||
correct += 1;
|
||||
}
|
||||
}
|
||||
|
||||
println!("");
|
||||
println!(
|
||||
"{} correct out of {}: {:.2}%",
|
||||
correct,
|
||||
TEST_SIZE,
|
||||
(correct as f32 / TEST_SIZE as f32) * 100.0
|
||||
);
|
||||
}
|
||||
|
||||
fn one_hot(value: usize, categories: usize) -> DVector<f32> {
|
||||
let mut res = DVector::from_element(categories, 0.0);
|
||||
if value < categories {
|
||||
res[value] = 1.0;
|
||||
}
|
||||
res
|
||||
}
|
@ -0,0 +1,213 @@
|
||||
use nalgebra::DVector;
|
||||
use rand::Rng;
|
||||
use rust_mnist::Mnist;
|
||||
use std::io::Write;
|
||||
|
||||
use neuramethyst::{
|
||||
cycle_shuffling,
|
||||
derivable::{
|
||||
activation::{Logistic, Relu, Swish},
|
||||
loss::Euclidean,
|
||||
regularize::NeuraL2,
|
||||
},
|
||||
plot_losses,
|
||||
prelude::*,
|
||||
};
|
||||
|
||||
const TRAIN_SIZE: usize = 50000;
|
||||
const TEST_SIZE: usize = 1000;
|
||||
const WIDTH: usize = 28;
|
||||
const HEIGHT: usize = 28;
|
||||
const REG_RATE: f32 = 0.003;
|
||||
const EPOCHS: usize = 80;
|
||||
|
||||
// const BASE_NOISE: f32 = 0.05;
|
||||
const NOISE_AMOUNT: f32 = 0.5;
|
||||
const SHIFT_AMOUNT: i32 = 9;
|
||||
|
||||
pub fn main() {
|
||||
let Mnist {
|
||||
train_data: train_images,
|
||||
train_labels,
|
||||
test_data: test_images,
|
||||
test_labels,
|
||||
..
|
||||
} = Mnist::new("data/");
|
||||
|
||||
let train_images = train_images
|
||||
.into_iter()
|
||||
.map(|raw| {
|
||||
DVector::from_iterator(WIDTH * HEIGHT, raw.into_iter().map(|x| x as f32 / 255.0))
|
||||
})
|
||||
.take(TRAIN_SIZE);
|
||||
let train_labels = train_labels
|
||||
.into_iter()
|
||||
.map(|x| one_hot(x as usize, 10))
|
||||
.take(TRAIN_SIZE);
|
||||
|
||||
let test_images = test_images
|
||||
.into_iter()
|
||||
.map(|raw| {
|
||||
DVector::from_iterator(WIDTH * HEIGHT, raw.into_iter().map(|x| x as f32 / 255.0))
|
||||
})
|
||||
.take(TEST_SIZE);
|
||||
let test_labels = test_labels
|
||||
.into_iter()
|
||||
.map(|x| one_hot(x as usize, 10))
|
||||
.take(TEST_SIZE);
|
||||
|
||||
let test_data: Vec<_> = augment_data(test_images.zip(test_labels)).collect();
|
||||
|
||||
let mut network = neura_residual![
|
||||
<= 0, 1;
|
||||
neura_layer!("isolate", WIDTH * HEIGHT, WIDTH * HEIGHT + 10) => 1, 3, 5, 7, 9, 10;
|
||||
neura_layer!("isolate", 0, WIDTH * HEIGHT) => 0, 1, 3;
|
||||
neura_layer!("dense", 100).regularization(NeuraL2(REG_RATE)).activation(Swish(Logistic)) => 0, 2;
|
||||
neura_layer!("dropout", 0.5);
|
||||
neura_layer!("dense", 50).regularization(NeuraL2(REG_RATE)).activation(Swish(Logistic)) => 0, 2, 4;
|
||||
neura_layer!("dropout", 0.5);
|
||||
neura_layer!("dense", 50).regularization(NeuraL2(REG_RATE)).activation(Swish(Logistic)) => 0, 2;
|
||||
neura_layer!("dropout", 0.33);
|
||||
neura_layer!("dense", 25).regularization(NeuraL2(REG_RATE)).activation(Swish(Logistic)) => 0, 2;
|
||||
neura_layer!("dropout", 0.33);
|
||||
neura_layer!("dense", 25).regularization(NeuraL2(REG_RATE)).activation(Swish(Logistic));
|
||||
// neura_layer!("dropout", 0.33);
|
||||
neura_layer!("dense", WIDTH * HEIGHT).activation(Relu);
|
||||
]
|
||||
.construct(NeuraShape::Vector(WIDTH * HEIGHT + 10))
|
||||
.unwrap();
|
||||
|
||||
let trainer = NeuraBatchedTrainer::with_epochs(0.03, EPOCHS, 512, TRAIN_SIZE);
|
||||
// trainer.log_iterations = 1;
|
||||
let train_data = augment_data(cycle_shuffling(
|
||||
train_images.clone().zip(train_labels.clone()),
|
||||
rand::thread_rng(),
|
||||
));
|
||||
|
||||
let losses = trainer.train(
|
||||
&NeuraBackprop::new(Euclidean),
|
||||
&mut network,
|
||||
train_data,
|
||||
&test_data,
|
||||
);
|
||||
plot_losses(losses, 128, 48);
|
||||
|
||||
loop {
|
||||
let mut image = uniform_vector(WIDTH * HEIGHT + 10);
|
||||
let mut buffer = String::new();
|
||||
print!("> ");
|
||||
std::io::stdout().flush().unwrap();
|
||||
if let Err(_) = std::io::stdin().read_line(&mut buffer) {
|
||||
break;
|
||||
}
|
||||
|
||||
for i in 0..10 {
|
||||
image[WIDTH * HEIGHT + i] = buffer
|
||||
.chars()
|
||||
.any(|c| c == char::from_digit(i as u32, 10).unwrap())
|
||||
as u8 as f32;
|
||||
}
|
||||
|
||||
for _iter in 0..5 {
|
||||
let new_image = network.eval(&image);
|
||||
|
||||
neuramethyst::draw_neuron_activation(
|
||||
|[x, y]| {
|
||||
let x = ((x + 1.0) / 2.0 * WIDTH as f32) as usize;
|
||||
let y = ((y + 1.0) / 2.0 * HEIGHT as f32) as usize;
|
||||
|
||||
let index = x + y * WIDTH;
|
||||
|
||||
vec![new_image[index]]
|
||||
},
|
||||
1.0,
|
||||
WIDTH as u32,
|
||||
HEIGHT as u32,
|
||||
);
|
||||
|
||||
for i in 0..(WIDTH * HEIGHT) {
|
||||
image[i] = new_image[i] * 0.6 + image[i] * 0.3;
|
||||
}
|
||||
|
||||
std::thread::sleep(std::time::Duration::new(0, 100_000_000));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn uniform_vector(length: usize) -> DVector<f32> {
|
||||
let mut res = DVector::from_element(length, 0.0);
|
||||
let mut rng = rand::thread_rng();
|
||||
|
||||
for i in 0..length {
|
||||
res[i] = rng.gen();
|
||||
}
|
||||
|
||||
res
|
||||
}
|
||||
|
||||
fn one_hot(value: usize, categories: usize) -> DVector<f32> {
|
||||
let mut res = DVector::from_element(categories, 0.0);
|
||||
if value < categories {
|
||||
res[value] = 1.0;
|
||||
}
|
||||
res
|
||||
}
|
||||
|
||||
fn add_noise(mut image: DVector<f32>, rng: &mut impl Rng, amount: f32) -> DVector<f32> {
|
||||
if amount <= 0.0 {
|
||||
return image;
|
||||
}
|
||||
|
||||
let uniform = rand::distributions::Uniform::new(0.0, amount);
|
||||
|
||||
for i in 0..image.len() {
|
||||
let x = rng.sample(uniform);
|
||||
image[i] = image[i] * (1.0 - x) + (1.0 - image[i]) * x;
|
||||
}
|
||||
|
||||
image
|
||||
}
|
||||
|
||||
fn shift(image: &DVector<f32>, dx: i32, dy: i32) -> DVector<f32> {
|
||||
let mut res = DVector::from_element(image.len(), 0.0);
|
||||
let width = WIDTH as i32;
|
||||
let height = HEIGHT as i32;
|
||||
|
||||
for y in 0..height {
|
||||
for x in 0..width {
|
||||
let x2 = x + dx;
|
||||
let y2 = y + dy;
|
||||
if y2 < 0 || y2 >= height || x2 < 0 || x2 >= width {
|
||||
continue;
|
||||
}
|
||||
res[(y2 * width + x2) as usize] = image[(y * width + x) as usize];
|
||||
}
|
||||
}
|
||||
|
||||
res
|
||||
}
|
||||
|
||||
fn augment_data(
|
||||
iter: impl Iterator<Item = (DVector<f32>, DVector<f32>)>,
|
||||
) -> impl Iterator<Item = (DVector<f32>, DVector<f32>)> {
|
||||
let mut rng = rand::thread_rng();
|
||||
iter.map(move |(image, label)| {
|
||||
let noise_amount = rng.gen_range(0.05..NOISE_AMOUNT);
|
||||
let base_image = shift(
|
||||
&image,
|
||||
rng.gen_range(-SHIFT_AMOUNT..SHIFT_AMOUNT),
|
||||
rng.gen_range(-SHIFT_AMOUNT..SHIFT_AMOUNT),
|
||||
) * rng.gen_range(0.6..1.0);
|
||||
// let base_image = add_noise(base_image, &mut rng, base_noise);
|
||||
|
||||
let noisy_image = add_noise(base_image.clone(), &mut rng, noise_amount);
|
||||
|
||||
(
|
||||
DVector::from_iterator(
|
||||
WIDTH * HEIGHT + 10,
|
||||
noisy_image.iter().copied().chain(label.iter().copied()),
|
||||
),
|
||||
image,
|
||||
)
|
||||
})
|
||||
}
|
@ -0,0 +1,161 @@
|
||||
use nalgebra::{DVector, Scalar};
|
||||
|
||||
use super::*;
|
||||
|
||||
/// **Class invariant:** start and end are
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct NeuraIsolateLayer {
|
||||
start: NeuraShape,
|
||||
end: NeuraShape,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum NeuraIsolateLayerErr {
|
||||
Incompatible {
|
||||
start: NeuraShape,
|
||||
end: NeuraShape,
|
||||
input_shape: NeuraShape,
|
||||
},
|
||||
OutOfBound {
|
||||
start: NeuraShape,
|
||||
end: NeuraShape,
|
||||
input_shape: NeuraShape,
|
||||
},
|
||||
OutOfOrder {
|
||||
start: NeuraShape,
|
||||
end: NeuraShape,
|
||||
},
|
||||
}
|
||||
|
||||
impl NeuraIsolateLayer {
|
||||
pub fn new<T: Into<NeuraShape>>(start: T, end: T) -> Option<Self> {
|
||||
let start = start.into();
|
||||
let end = end.into();
|
||||
|
||||
if start.is_compatible(end) {
|
||||
Some(Self { start, end })
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl NeuraShapedLayer for NeuraIsolateLayer {
|
||||
fn output_shape(&self) -> NeuraShape {
|
||||
self.end.sub(self.start).unwrap_or_else(|| unreachable!())
|
||||
}
|
||||
}
|
||||
|
||||
impl NeuraPartialLayer for NeuraIsolateLayer {
|
||||
type Constructed = NeuraIsolateLayer;
|
||||
type Err = NeuraIsolateLayerErr;
|
||||
|
||||
fn construct(self, input_shape: NeuraShape) -> Result<Self::Constructed, Self::Err> {
|
||||
use NeuraShape::*;
|
||||
let start = self.start;
|
||||
let end = self.end;
|
||||
|
||||
match (input_shape, start, end) {
|
||||
(Vector(xi), Vector(xs), Vector(xe)) => {
|
||||
if xs >= xe {
|
||||
return Err(NeuraIsolateLayerErr::OutOfOrder { start, end });
|
||||
}
|
||||
|
||||
if xs >= xi || xe > xi {
|
||||
return Err(NeuraIsolateLayerErr::OutOfBound {
|
||||
start,
|
||||
end,
|
||||
input_shape,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
(Matrix(_xi, _yi), Matrix(_xs, _ys), Matrix(_xe, _ye)) => unimplemented!(),
|
||||
(Tensor(_xi, _yi, _zi), Tensor(_xs, _ys, _zs), Tensor(_xe, _ye, _ze)) => {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
_ => Err(NeuraIsolateLayerErr::Incompatible {
|
||||
start,
|
||||
end,
|
||||
input_shape,
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<F: Clone + Scalar> NeuraLayer<DVector<F>> for NeuraIsolateLayer {
|
||||
type Output = DVector<F>;
|
||||
|
||||
fn eval(&self, input: &DVector<F>) -> Self::Output {
|
||||
let (NeuraShape::Vector(start), NeuraShape::Vector(end)) = (self.start, self.end) else {
|
||||
panic!("NeuraIsolateLayer expected a value of dimension {}, got a vector", self.start.dims());
|
||||
};
|
||||
|
||||
DVector::from_iterator(end - start, input.iter().cloned().skip(start).take(end))
|
||||
}
|
||||
}
|
||||
|
||||
impl NeuraTrainableLayerBase for NeuraIsolateLayer {
|
||||
type Gradient = ();
|
||||
|
||||
#[inline(always)]
|
||||
fn default_gradient(&self) -> Self::Gradient {
|
||||
()
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn apply_gradient(&mut self, _gradient: &Self::Gradient) {
|
||||
// Noop
|
||||
}
|
||||
}
|
||||
|
||||
impl<F: Clone + Scalar> NeuraTrainableLayerEval<DVector<F>> for NeuraIsolateLayer {
|
||||
type IntermediaryRepr = ();
|
||||
|
||||
fn eval_training(&self, input: &DVector<F>) -> (Self::Output, Self::IntermediaryRepr) {
|
||||
(self.eval(input), ())
|
||||
}
|
||||
}
|
||||
|
||||
impl<Input> NeuraTrainableLayerSelf<Input> for NeuraIsolateLayer
|
||||
where
|
||||
Self: NeuraTrainableLayerEval<Input>,
|
||||
{
|
||||
#[inline(always)]
|
||||
fn regularize_layer(&self) -> Self::Gradient {
|
||||
()
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn get_gradient(
|
||||
&self,
|
||||
_input: &Input,
|
||||
_intermediary: &Self::IntermediaryRepr,
|
||||
_epsilon: &Self::Output,
|
||||
) -> Self::Gradient {
|
||||
()
|
||||
}
|
||||
}
|
||||
|
||||
impl<F: Clone + Scalar + Default> NeuraTrainableLayerBackprop<DVector<F>> for NeuraIsolateLayer {
|
||||
fn backprop_layer(
|
||||
&self,
|
||||
input: &DVector<F>,
|
||||
_intermediary: &Self::IntermediaryRepr,
|
||||
epsilon: &Self::Output,
|
||||
) -> DVector<F> {
|
||||
let mut result = DVector::from_element(input.len(), F::default());
|
||||
let NeuraShape::Vector(start) = self.start else {
|
||||
unreachable!();
|
||||
};
|
||||
|
||||
for i in 0..epsilon.len() {
|
||||
result[start + i] = epsilon[i].clone();
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
}
|
Loading…
Reference in new issue