From f47b89b20b80633eace2fa91073968fafcc7a71e Mon Sep 17 00:00:00 2001 From: Adrien Burgun Date: Thu, 21 Sep 2023 14:32:18 +0200 Subject: [PATCH] :sparkles: Working and tested expression parser --- src/cursor.rs | 112 ++++++++++++++++++ src/lib.rs | 1 + src/parse/mod.rs | 292 +++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 355 insertions(+), 50 deletions(-) create mode 100644 src/cursor.rs diff --git a/src/cursor.rs b/src/cursor.rs new file mode 100644 index 0000000..f14499a --- /dev/null +++ b/src/cursor.rs @@ -0,0 +1,112 @@ +#[derive(Clone, Debug)] +pub struct Cursor<'a, T> { + data: &'a [T], + offset: usize, +} + +impl<'a, T> PartialEq for Cursor<'a, T> { + fn eq(&self, other: &Self) -> bool { + self.data.as_ptr() == other.data.as_ptr() + && self.data.len() == other.data.len() + && self.offset == other.offset + } +} + +impl<'a, T: 'a> std::ops::Index for Cursor<'a, T> { + type Output = T; + + fn index(&self, index: usize) -> &Self::Output { + &self.data[self.offset + index] + } +} + +macro_rules! impl_range { + ( $range:ty ) => { + impl<'a, T> std::ops::Index<$range> for Cursor<'a, T> { + type Output = [T]; + + fn index(&self, range: $range) -> &Self::Output { + &self.data[self.offset..][range] + } + } + }; +} + +impl_range!(std::ops::Range); +impl_range!(std::ops::RangeFrom); +impl_range!(std::ops::RangeFull); +impl_range!(std::ops::RangeInclusive); +impl_range!(std::ops::RangeTo); +impl_range!(std::ops::RangeToInclusive); + +impl<'a, T> From> for &'a [T] { + fn from(cursor: Cursor<'a, T>) -> Self { + &cursor.data[cursor.offset..] + } +} + +impl<'a, T> From<&'_ Cursor<'a, T>> for &'a [T] { + fn from(cursor: &'_ Cursor<'a, T>) -> Self { + &cursor.data[cursor.offset..] + } +} + +impl<'a, T> From<&'a [T]> for Cursor<'a, T> { + fn from(data: &'a [T]) -> Self { + Self { data, offset: 0 } + } +} + +impl<'a, T, const N: usize> From<&'a [T; N]> for Cursor<'a, T> { + fn from(data: &'a [T; N]) -> Self { + Self { data, offset: 0 } + } +} + +impl<'a, T> std::ops::Deref for Cursor<'a, T> { + type Target = [T]; + + fn deref(&self) -> &Self::Target { + &self.data[self.offset..] + } +} + +impl<'a, T> Cursor<'a, T> { + pub fn new(data: &'a [T], offset: usize) -> Self { + Self { data, offset } + } + + pub fn take(&mut self, amount: usize) -> &'a [T] { + let head = self.peek(amount); + self.offset += amount; + head + } + + pub fn peek(&self, amount: usize) -> &'a [T] { + if self.offset >= self.data.len() { + return &[]; + } + let max = (self.offset + amount).min(self.data.len()); + + &self.data[self.offset..max] + } + + pub fn get(&self, at: usize) -> Option<&'a T> { + self.data.get(self.offset + at) + } + + pub fn iter<'b>(&'b self) -> impl Iterator + 'b { + self.data[self.offset..].iter() + } + + pub fn len(&self) -> usize { + self.data.len().saturating_sub(self.offset) + } + + pub fn range>(&self, range: R) -> Self { + Self { + data: &self.data[self.offset..][range], + offset: 0, + } + } +} diff --git a/src/lib.rs b/src/lib.rs index ea86848..1b52c78 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1 +1,2 @@ +pub mod cursor; pub mod parse; diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 56bdffd..4ced546 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -1,3 +1,4 @@ +use crate::cursor::Cursor; use regex::Regex; #[derive(PartialEq, Eq, Clone, Copy, Debug)] @@ -22,11 +23,11 @@ impl Operator { fn precedence(self) -> u8 { use Operator as O; match self { - O::Add | O::Sub => 5, + O::Add | O::Sub => 3, O::RShift | O::LShift => 4, - O::Mod => 3, - O::Mul | O::Div => 0, - O::Eq | O::Neq | O::Gt | O::Lt | O::Gte | O::Lte => 10, + O::Mod => 5, + O::Mul | O::Div => 10, + O::Eq | O::Neq | O::Gt | O::Lt | O::Gte | O::Lte => 0, } } } @@ -57,6 +58,7 @@ pub enum ParseError { ExpectedOperand, } +/// Transforms a raw string into a sequence of `BasicToken`s pub fn tokenize(raw: &str) -> Result, ParseError> { macro_rules! match_token { ( $line:expr, $res:expr $(;)? ) => {}; @@ -121,6 +123,7 @@ pub fn tokenize(raw: &str) -> Result, ParseError> { res.push(BasicToken::NewLine); } while line.len() > 0 { + // Main match clause for tokens match_token!(line, res; match_space => (), match_let => (), @@ -158,6 +161,7 @@ pub fn tokenize(raw: &str) -> Result, ParseError> { }), match_string(with_quotes) => (BasicToken::String(with_quotes[1..with_quotes.len() - 1].to_string())), ); + // If this line is reached, then none of the matches above matched return Err(ParseError::InvalidToken(line.to_string())); } @@ -166,6 +170,7 @@ pub fn tokenize(raw: &str) -> Result, ParseError> { Ok(res) } +#[derive(Clone, Debug, PartialEq)] pub enum BasicAstExpression { Integer(i64), Float(f64), @@ -173,22 +178,25 @@ pub enum BasicAstExpression { Binary(Operator, Box, Box), } +#[derive(Clone, Debug, PartialEq)] pub enum BasicAstOperation { Assign(String, BasicAstExpression), Jump(String), IfThenElse(BasicAstExpression, BasicAstBlock, BasicAstBlock), } +#[derive(Clone, Debug, PartialEq)] pub struct BasicAstInstruction { pub label: Option, pub operation: BasicAstOperation, } -#[derive(Default)] +#[derive(Clone, Debug, PartialEq, Default)] pub struct BasicAstBlock { pub instructions: Vec, } +/// Returns the index of the first token matching `needle` fn find_token_index(tokens: &[BasicToken], needle: BasicToken) -> Result { tokens .iter() @@ -198,40 +206,43 @@ fn find_token_index(tokens: &[BasicToken], needle: BasicToken) -> Result Result { - /// Advances `tokens` by `by` tokens, skipping the first newline tokens if present - fn advance(tokens: &mut &[BasicToken], by: usize) { - while let Some(BasicToken::NewLine) = tokens.get(0) { - *tokens = &(*tokens)[1..]; - } - *tokens = &(*tokens)[by..]; - } - +fn parse_expression(tokens: &mut Cursor<'_, BasicToken>) -> Result { /// Returns the first non-newline token in `tokens` - fn peek<'a>(tokens: &'a &[BasicToken]) -> Option<&'a BasicToken> { + fn peek<'a>(tokens: &'a [BasicToken]) -> Option<&'a BasicToken> { tokens.iter().find(|t| !matches!(t, BasicToken::NewLine)) } /// Parses a single expression item - fn parse_expression_item(tokens: &mut &[BasicToken]) -> Result { - match *tokens { + fn parse_expression_item( + tokens: &mut Cursor<'_, BasicToken>, + ) -> Result { + match tokens.peek(2) { [BasicToken::Integer(int), ..] => { - advance(tokens, 1); + tokens.take(1); Ok(BasicAstExpression::Integer(*int)) - }, + } [BasicToken::Float(float), ..] => { - advance(tokens, 1); + tokens.take(1); Ok(BasicAstExpression::Float(*float)) - }, + } [BasicToken::Name(_fn_name), BasicToken::OpenParen, ..] => { unimplemented!("Function calls are not yet supported"); - }, + } [BasicToken::Name(name), ..] => { - advance(tokens, 1); + tokens.take(1); Ok(BasicAstExpression::Variable(name.clone())) - }, + } + [BasicToken::OpenParen, ..] => { + tokens.take(1); + let res = parse_expression(tokens)?; + if let Some(BasicToken::CloseParen) = tokens.take(1).get(0) { + Ok(res) + } else { + Err(ParseError::MissingToken(BasicToken::CloseParen)) + } + } + [first, ..] => Err(ParseError::UnexpectedToken(first.clone())), [] => Err(ParseError::ExpectedOperand), - _ => Err(ParseError::UnexpectedToken(tokens[0].clone())), } } @@ -239,17 +250,23 @@ fn parse_expression(mut tokens: &[BasicToken]) -> Result Result { + fn parse_expression_main( + tokens: &mut Cursor<'_, BasicToken>, + lhs: BasicAstExpression, + min_precedence: u8, + ) -> Result { let mut ast = lhs; while let Some(&BasicToken::Operator(operator)) = peek(tokens) { if operator.precedence() < min_precedence { - break + break; } - advance(tokens, 1); + tokens.take(1); let mut rhs = parse_expression_item(tokens)?; while let Some(&BasicToken::Operator(sub_operator)) = peek(tokens) { if sub_operator.precedence() > operator.precedence() { rhs = parse_expression_main(tokens, rhs, operator.precedence() + 1)?; + } else { + break; } } @@ -260,65 +277,70 @@ fn parse_expression(mut tokens: &[BasicToken]) -> Result Result { +pub fn build_ast(tokens: &[BasicToken]) -> Result { + let mut tokens = Cursor::from(tokens); let mut instructions = Vec::new(); let mut current_label: Option = None; while tokens.len() > 0 { - match &tokens[..] { + match tokens.peek(2) { [BasicToken::NewLine, BasicToken::Integer(label), ..] => { - tokens = &tokens[2..]; + tokens.take(2); current_label = Some(label.to_string()); } [BasicToken::NewLine, BasicToken::Name(label), ..] => { - tokens = &tokens[2..]; + tokens.take(2); current_label = Some(label.clone()); } [BasicToken::NewLine, ..] => { - tokens = &tokens[1..]; + tokens.take(1); current_label = None; } [BasicToken::Name(variable_name), BasicToken::Assign, ..] => { - tokens = &tokens[2..]; - let expression = parse_expression(tokens)?; - // TODO: advance `tokens` + tokens.take(2); + let expression = parse_expression(&mut tokens)?; instructions.push(BasicAstInstruction { label: current_label.take(), - operation: BasicAstOperation::Assign(variable_name.clone(), expression) + operation: BasicAstOperation::Assign(variable_name.clone(), expression), }); } [BasicToken::If, ..] => { - tokens = &tokens[1..]; - let then_index = find_token_index(tokens, BasicToken::Then)?; - let end_index = find_token_index(tokens, BasicToken::EndIf)?; + tokens.take(1); + let then_index = find_token_index(&tokens, BasicToken::Then)?; + let end_index = find_token_index(&tokens, BasicToken::EndIf)?; - let condition = parse_expression(&tokens[0..then_index])?; - if let Ok(else_index) = find_token_index(tokens, BasicToken::Else) { + let condition = parse_expression(&mut tokens.range(0..then_index))?; + if let Ok(else_index) = find_token_index(&tokens, BasicToken::Else) { let true_branch = build_ast(&tokens[(then_index + 1)..else_index])?; let false_branch = build_ast(&tokens[(else_index + 1)..end_index])?; instructions.push(BasicAstInstruction { label: current_label.take(), - operation: BasicAstOperation::IfThenElse(condition, true_branch, false_branch) + operation: BasicAstOperation::IfThenElse( + condition, + true_branch, + false_branch, + ), }); } else { let true_branch = build_ast(&tokens[(then_index + 1)..end_index])?; instructions.push(BasicAstInstruction { label: current_label.take(), - operation: BasicAstOperation::IfThenElse(condition, true_branch, BasicAstBlock::default()) + operation: BasicAstOperation::IfThenElse( + condition, + true_branch, + BasicAstBlock::default(), + ), }); } - tokens = &tokens[end_index..]; + tokens.take(end_index); } _ => { return Err(ParseError::UnexpectedToken(tokens[0].clone())); @@ -396,4 +418,174 @@ mod test { ], ); } + + #[test] + fn test_operator_precedence() { + fn test_parse(list: [BasicToken; N]) -> BasicAstExpression { + parse_expression(&mut Cursor::from(&list)).unwrap() + } + + fn test_err(list: [BasicToken; N]) -> ParseError { + parse_expression(&mut Cursor::from(&list)).err().unwrap() + } + + assert_eq!( + test_parse([BasicToken::Name(String::from("hello"))]), + BasicAstExpression::Variable(String::from("hello")) + ); + + assert_eq!( + test_parse([ + BasicToken::Name(String::from("hello")), + BasicToken::Name(String::from("world")), + ]), + BasicAstExpression::Variable(String::from("hello")) + ); + + assert_eq!( + test_parse([ + BasicToken::Name(String::from("hello")), + BasicToken::Operator(Operator::Add), + BasicToken::Integer(1), + ]), + BasicAstExpression::Binary( + Operator::Add, + Box::new(BasicAstExpression::Variable(String::from("hello"))), + Box::new(BasicAstExpression::Integer(1)), + ) + ); + + assert_eq!( + test_parse([ + BasicToken::Name(String::from("hello")), + BasicToken::Operator(Operator::Add), + BasicToken::Integer(2), + BasicToken::Operator(Operator::Mul), + BasicToken::Name(String::from("world")), + ]), + BasicAstExpression::Binary( + Operator::Add, + Box::new(BasicAstExpression::Variable(String::from("hello"))), + Box::new(BasicAstExpression::Binary( + Operator::Mul, + Box::new(BasicAstExpression::Integer(2)), + Box::new(BasicAstExpression::Variable(String::from("world"))), + )), + ) + ); + + assert_eq!( + test_parse([ + BasicToken::Name(String::from("hello")), + BasicToken::Operator(Operator::Mul), + BasicToken::Integer(2), + BasicToken::Operator(Operator::Add), + BasicToken::Name(String::from("world")), + ]), + BasicAstExpression::Binary( + Operator::Add, + Box::new(BasicAstExpression::Binary( + Operator::Mul, + Box::new(BasicAstExpression::Variable(String::from("hello"))), + Box::new(BasicAstExpression::Integer(2)), + )), + Box::new(BasicAstExpression::Variable(String::from("world"))), + ) + ); + + assert_eq!( + test_parse([ + BasicToken::Name(String::from("hello")), + BasicToken::Operator(Operator::Mul), + BasicToken::OpenParen, + BasicToken::Integer(2), + BasicToken::Operator(Operator::Add), + BasicToken::Name(String::from("world")), + BasicToken::CloseParen, + ]), + BasicAstExpression::Binary( + Operator::Mul, + Box::new(BasicAstExpression::Variable(String::from("hello"))), + Box::new(BasicAstExpression::Binary( + Operator::Add, + Box::new(BasicAstExpression::Integer(2)), + Box::new(BasicAstExpression::Variable(String::from("world"))), + )), + ) + ); + + assert_eq!( + test_parse([ + BasicToken::Name(String::from("hello")), + BasicToken::Operator(Operator::Add), + BasicToken::OpenParen, + BasicToken::Name(String::from("world")), + BasicToken::Operator(Operator::Mul), + BasicToken::Integer(2), + BasicToken::CloseParen, + ]), + BasicAstExpression::Binary( + Operator::Add, + Box::new(BasicAstExpression::Variable(String::from("hello"))), + Box::new(BasicAstExpression::Binary( + Operator::Mul, + Box::new(BasicAstExpression::Variable(String::from("world"))), + Box::new(BasicAstExpression::Integer(2)), + )), + ) + ); + + assert_eq!( + test_err([ + BasicToken::Name(String::from("hello")), + BasicToken::Operator(Operator::Add), + ]), + ParseError::ExpectedOperand + ); + + assert_eq!( + test_err([ + BasicToken::Name(String::from("hello")), + BasicToken::Operator(Operator::Add), + BasicToken::OpenParen, + BasicToken::Name(String::from("world")), + BasicToken::Operator(Operator::Mul), + BasicToken::Integer(2), + ]), + ParseError::MissingToken(BasicToken::CloseParen) + ); + + assert_eq!( + test_err([ + BasicToken::Name(String::from("hello")), + BasicToken::Operator(Operator::Add), + BasicToken::Operator(Operator::Mul), + ]), + ParseError::UnexpectedToken(BasicToken::Operator(Operator::Mul)) + ); + + assert!(matches!( + test_err([ + BasicToken::Name(String::from("hello")), + BasicToken::Operator(Operator::Add), + BasicToken::OpenParen, + ]), + ParseError::ExpectedOperand | ParseError::MissingToken(BasicToken::CloseParen) + )); + + assert!(matches!( + test_err([ + BasicToken::Name(String::from("hello")), + BasicToken::Operator(Operator::Add), + BasicToken::OpenParen, + BasicToken::CloseParen + ]), + ParseError::ExpectedOperand | ParseError::UnexpectedToken(BasicToken::CloseParen) + )); + + assert_eq!( + test_err([BasicToken::Operator(Operator::Add), BasicToken::Integer(2)]), + ParseError::UnexpectedToken(BasicToken::Operator(Operator::Add)) + ); + } }