Working and tested expression parser

main
Shad Amethyst 1 year ago
parent ab0380cd79
commit f47b89b20b

@ -0,0 +1,112 @@
#[derive(Clone, Debug)]
pub struct Cursor<'a, T> {
data: &'a [T],
offset: usize,
}
impl<'a, T> PartialEq for Cursor<'a, T> {
fn eq(&self, other: &Self) -> bool {
self.data.as_ptr() == other.data.as_ptr()
&& self.data.len() == other.data.len()
&& self.offset == other.offset
}
}
impl<'a, T: 'a> std::ops::Index<usize> for Cursor<'a, T> {
type Output = T;
fn index(&self, index: usize) -> &Self::Output {
&self.data[self.offset + index]
}
}
macro_rules! impl_range {
( $range:ty ) => {
impl<'a, T> std::ops::Index<$range> for Cursor<'a, T> {
type Output = [T];
fn index(&self, range: $range) -> &Self::Output {
&self.data[self.offset..][range]
}
}
};
}
impl_range!(std::ops::Range<usize>);
impl_range!(std::ops::RangeFrom<usize>);
impl_range!(std::ops::RangeFull);
impl_range!(std::ops::RangeInclusive<usize>);
impl_range!(std::ops::RangeTo<usize>);
impl_range!(std::ops::RangeToInclusive<usize>);
impl<'a, T> From<Cursor<'a, T>> for &'a [T] {
fn from(cursor: Cursor<'a, T>) -> Self {
&cursor.data[cursor.offset..]
}
}
impl<'a, T> From<&'_ Cursor<'a, T>> for &'a [T] {
fn from(cursor: &'_ Cursor<'a, T>) -> Self {
&cursor.data[cursor.offset..]
}
}
impl<'a, T> From<&'a [T]> for Cursor<'a, T> {
fn from(data: &'a [T]) -> Self {
Self { data, offset: 0 }
}
}
impl<'a, T, const N: usize> From<&'a [T; N]> for Cursor<'a, T> {
fn from(data: &'a [T; N]) -> Self {
Self { data, offset: 0 }
}
}
impl<'a, T> std::ops::Deref for Cursor<'a, T> {
type Target = [T];
fn deref(&self) -> &Self::Target {
&self.data[self.offset..]
}
}
impl<'a, T> Cursor<'a, T> {
pub fn new(data: &'a [T], offset: usize) -> Self {
Self { data, offset }
}
pub fn take(&mut self, amount: usize) -> &'a [T] {
let head = self.peek(amount);
self.offset += amount;
head
}
pub fn peek(&self, amount: usize) -> &'a [T] {
if self.offset >= self.data.len() {
return &[];
}
let max = (self.offset + amount).min(self.data.len());
&self.data[self.offset..max]
}
pub fn get(&self, at: usize) -> Option<&'a T> {
self.data.get(self.offset + at)
}
pub fn iter<'b>(&'b self) -> impl Iterator<Item = &'a T> + 'b {
self.data[self.offset..].iter()
}
pub fn len(&self) -> usize {
self.data.len().saturating_sub(self.offset)
}
pub fn range<R: std::slice::SliceIndex<[T], Output = [T]>>(&self, range: R) -> Self {
Self {
data: &self.data[self.offset..][range],
offset: 0,
}
}
}

@ -1 +1,2 @@
pub mod cursor;
pub mod parse; pub mod parse;

@ -1,3 +1,4 @@
use crate::cursor::Cursor;
use regex::Regex; use regex::Regex;
#[derive(PartialEq, Eq, Clone, Copy, Debug)] #[derive(PartialEq, Eq, Clone, Copy, Debug)]
@ -22,11 +23,11 @@ impl Operator {
fn precedence(self) -> u8 { fn precedence(self) -> u8 {
use Operator as O; use Operator as O;
match self { match self {
O::Add | O::Sub => 5, O::Add | O::Sub => 3,
O::RShift | O::LShift => 4, O::RShift | O::LShift => 4,
O::Mod => 3, O::Mod => 5,
O::Mul | O::Div => 0, O::Mul | O::Div => 10,
O::Eq | O::Neq | O::Gt | O::Lt | O::Gte | O::Lte => 10, O::Eq | O::Neq | O::Gt | O::Lt | O::Gte | O::Lte => 0,
} }
} }
} }
@ -57,6 +58,7 @@ pub enum ParseError {
ExpectedOperand, ExpectedOperand,
} }
/// Transforms a raw string into a sequence of `BasicToken`s
pub fn tokenize(raw: &str) -> Result<Vec<BasicToken>, ParseError> { pub fn tokenize(raw: &str) -> Result<Vec<BasicToken>, ParseError> {
macro_rules! match_token { macro_rules! match_token {
( $line:expr, $res:expr $(;)? ) => {}; ( $line:expr, $res:expr $(;)? ) => {};
@ -121,6 +123,7 @@ pub fn tokenize(raw: &str) -> Result<Vec<BasicToken>, ParseError> {
res.push(BasicToken::NewLine); res.push(BasicToken::NewLine);
} }
while line.len() > 0 { while line.len() > 0 {
// Main match clause for tokens
match_token!(line, res; match_token!(line, res;
match_space => (), match_space => (),
match_let => (), match_let => (),
@ -158,6 +161,7 @@ pub fn tokenize(raw: &str) -> Result<Vec<BasicToken>, ParseError> {
}), }),
match_string(with_quotes) => (BasicToken::String(with_quotes[1..with_quotes.len() - 1].to_string())), match_string(with_quotes) => (BasicToken::String(with_quotes[1..with_quotes.len() - 1].to_string())),
); );
// If this line is reached, then none of the matches above matched // If this line is reached, then none of the matches above matched
return Err(ParseError::InvalidToken(line.to_string())); return Err(ParseError::InvalidToken(line.to_string()));
} }
@ -166,6 +170,7 @@ pub fn tokenize(raw: &str) -> Result<Vec<BasicToken>, ParseError> {
Ok(res) Ok(res)
} }
#[derive(Clone, Debug, PartialEq)]
pub enum BasicAstExpression { pub enum BasicAstExpression {
Integer(i64), Integer(i64),
Float(f64), Float(f64),
@ -173,22 +178,25 @@ pub enum BasicAstExpression {
Binary(Operator, Box<BasicAstExpression>, Box<BasicAstExpression>), Binary(Operator, Box<BasicAstExpression>, Box<BasicAstExpression>),
} }
#[derive(Clone, Debug, PartialEq)]
pub enum BasicAstOperation { pub enum BasicAstOperation {
Assign(String, BasicAstExpression), Assign(String, BasicAstExpression),
Jump(String), Jump(String),
IfThenElse(BasicAstExpression, BasicAstBlock, BasicAstBlock), IfThenElse(BasicAstExpression, BasicAstBlock, BasicAstBlock),
} }
#[derive(Clone, Debug, PartialEq)]
pub struct BasicAstInstruction { pub struct BasicAstInstruction {
pub label: Option<String>, pub label: Option<String>,
pub operation: BasicAstOperation, pub operation: BasicAstOperation,
} }
#[derive(Default)] #[derive(Clone, Debug, PartialEq, Default)]
pub struct BasicAstBlock { pub struct BasicAstBlock {
pub instructions: Vec<BasicAstInstruction>, pub instructions: Vec<BasicAstInstruction>,
} }
/// Returns the index of the first token matching `needle`
fn find_token_index(tokens: &[BasicToken], needle: BasicToken) -> Result<usize, ParseError> { fn find_token_index(tokens: &[BasicToken], needle: BasicToken) -> Result<usize, ParseError> {
tokens tokens
.iter() .iter()
@ -198,40 +206,43 @@ fn find_token_index(tokens: &[BasicToken], needle: BasicToken) -> Result<usize,
.ok_or(ParseError::MissingToken(needle)) .ok_or(ParseError::MissingToken(needle))
} }
fn parse_expression(mut tokens: &[BasicToken]) -> Result<BasicAstExpression, ParseError> { fn parse_expression(tokens: &mut Cursor<'_, BasicToken>) -> Result<BasicAstExpression, ParseError> {
/// Advances `tokens` by `by` tokens, skipping the first newline tokens if present
fn advance(tokens: &mut &[BasicToken], by: usize) {
while let Some(BasicToken::NewLine) = tokens.get(0) {
*tokens = &(*tokens)[1..];
}
*tokens = &(*tokens)[by..];
}
/// Returns the first non-newline token in `tokens` /// Returns the first non-newline token in `tokens`
fn peek<'a>(tokens: &'a &[BasicToken]) -> Option<&'a BasicToken> { fn peek<'a>(tokens: &'a [BasicToken]) -> Option<&'a BasicToken> {
tokens.iter().find(|t| !matches!(t, BasicToken::NewLine)) tokens.iter().find(|t| !matches!(t, BasicToken::NewLine))
} }
/// Parses a single expression item /// Parses a single expression item
fn parse_expression_item(tokens: &mut &[BasicToken]) -> Result<BasicAstExpression, ParseError> { fn parse_expression_item(
match *tokens { tokens: &mut Cursor<'_, BasicToken>,
) -> Result<BasicAstExpression, ParseError> {
match tokens.peek(2) {
[BasicToken::Integer(int), ..] => { [BasicToken::Integer(int), ..] => {
advance(tokens, 1); tokens.take(1);
Ok(BasicAstExpression::Integer(*int)) Ok(BasicAstExpression::Integer(*int))
}, }
[BasicToken::Float(float), ..] => { [BasicToken::Float(float), ..] => {
advance(tokens, 1); tokens.take(1);
Ok(BasicAstExpression::Float(*float)) Ok(BasicAstExpression::Float(*float))
}, }
[BasicToken::Name(_fn_name), BasicToken::OpenParen, ..] => { [BasicToken::Name(_fn_name), BasicToken::OpenParen, ..] => {
unimplemented!("Function calls are not yet supported"); unimplemented!("Function calls are not yet supported");
}, }
[BasicToken::Name(name), ..] => { [BasicToken::Name(name), ..] => {
advance(tokens, 1); tokens.take(1);
Ok(BasicAstExpression::Variable(name.clone())) Ok(BasicAstExpression::Variable(name.clone()))
}, }
[BasicToken::OpenParen, ..] => {
tokens.take(1);
let res = parse_expression(tokens)?;
if let Some(BasicToken::CloseParen) = tokens.take(1).get(0) {
Ok(res)
} else {
Err(ParseError::MissingToken(BasicToken::CloseParen))
}
}
[first, ..] => Err(ParseError::UnexpectedToken(first.clone())),
[] => Err(ParseError::ExpectedOperand), [] => Err(ParseError::ExpectedOperand),
_ => Err(ParseError::UnexpectedToken(tokens[0].clone())),
} }
} }
@ -239,17 +250,23 @@ fn parse_expression(mut tokens: &[BasicToken]) -> Result<BasicAstExpression, Par
/// recursively calling itself when an operator with a higher precedence is encountered. /// recursively calling itself when an operator with a higher precedence is encountered.
/// ///
/// See https://en.wikipedia.org/wiki/Operator-precedence_parser for more information /// See https://en.wikipedia.org/wiki/Operator-precedence_parser for more information
fn parse_expression_main(tokens: &mut &[BasicToken], lhs: BasicAstExpression, min_precedence: u8) -> Result<BasicAstExpression, ParseError> { fn parse_expression_main(
tokens: &mut Cursor<'_, BasicToken>,
lhs: BasicAstExpression,
min_precedence: u8,
) -> Result<BasicAstExpression, ParseError> {
let mut ast = lhs; let mut ast = lhs;
while let Some(&BasicToken::Operator(operator)) = peek(tokens) { while let Some(&BasicToken::Operator(operator)) = peek(tokens) {
if operator.precedence() < min_precedence { if operator.precedence() < min_precedence {
break break;
} }
advance(tokens, 1); tokens.take(1);
let mut rhs = parse_expression_item(tokens)?; let mut rhs = parse_expression_item(tokens)?;
while let Some(&BasicToken::Operator(sub_operator)) = peek(tokens) { while let Some(&BasicToken::Operator(sub_operator)) = peek(tokens) {
if sub_operator.precedence() > operator.precedence() { if sub_operator.precedence() > operator.precedence() {
rhs = parse_expression_main(tokens, rhs, operator.precedence() + 1)?; rhs = parse_expression_main(tokens, rhs, operator.precedence() + 1)?;
} else {
break;
} }
} }
@ -260,65 +277,70 @@ fn parse_expression(mut tokens: &[BasicToken]) -> Result<BasicAstExpression, Par
} }
// Remove starting newlines // Remove starting newlines
let lhs = parse_expression_item(&mut tokens)?; let lhs = parse_expression_item(tokens)?;
advance(&mut tokens, 1); let res = parse_expression_main(tokens, lhs, 0)?;
let res = parse_expression_main(&mut tokens, lhs, 0)?;
assert_eq!(tokens, []);
Ok(res) Ok(res)
} }
pub fn build_ast(mut tokens: &[BasicToken]) -> Result<BasicAstBlock, ParseError> { pub fn build_ast(tokens: &[BasicToken]) -> Result<BasicAstBlock, ParseError> {
let mut tokens = Cursor::from(tokens);
let mut instructions = Vec::new(); let mut instructions = Vec::new();
let mut current_label: Option<String> = None; let mut current_label: Option<String> = None;
while tokens.len() > 0 { while tokens.len() > 0 {
match &tokens[..] { match tokens.peek(2) {
[BasicToken::NewLine, BasicToken::Integer(label), ..] => { [BasicToken::NewLine, BasicToken::Integer(label), ..] => {
tokens = &tokens[2..]; tokens.take(2);
current_label = Some(label.to_string()); current_label = Some(label.to_string());
} }
[BasicToken::NewLine, BasicToken::Name(label), ..] => { [BasicToken::NewLine, BasicToken::Name(label), ..] => {
tokens = &tokens[2..]; tokens.take(2);
current_label = Some(label.clone()); current_label = Some(label.clone());
} }
[BasicToken::NewLine, ..] => { [BasicToken::NewLine, ..] => {
tokens = &tokens[1..]; tokens.take(1);
current_label = None; current_label = None;
} }
[BasicToken::Name(variable_name), BasicToken::Assign, ..] => { [BasicToken::Name(variable_name), BasicToken::Assign, ..] => {
tokens = &tokens[2..]; tokens.take(2);
let expression = parse_expression(tokens)?; let expression = parse_expression(&mut tokens)?;
// TODO: advance `tokens`
instructions.push(BasicAstInstruction { instructions.push(BasicAstInstruction {
label: current_label.take(), label: current_label.take(),
operation: BasicAstOperation::Assign(variable_name.clone(), expression) operation: BasicAstOperation::Assign(variable_name.clone(), expression),
}); });
} }
[BasicToken::If, ..] => { [BasicToken::If, ..] => {
tokens = &tokens[1..]; tokens.take(1);
let then_index = find_token_index(tokens, BasicToken::Then)?; let then_index = find_token_index(&tokens, BasicToken::Then)?;
let end_index = find_token_index(tokens, BasicToken::EndIf)?; let end_index = find_token_index(&tokens, BasicToken::EndIf)?;
let condition = parse_expression(&tokens[0..then_index])?; let condition = parse_expression(&mut tokens.range(0..then_index))?;
if let Ok(else_index) = find_token_index(tokens, BasicToken::Else) { if let Ok(else_index) = find_token_index(&tokens, BasicToken::Else) {
let true_branch = build_ast(&tokens[(then_index + 1)..else_index])?; let true_branch = build_ast(&tokens[(then_index + 1)..else_index])?;
let false_branch = build_ast(&tokens[(else_index + 1)..end_index])?; let false_branch = build_ast(&tokens[(else_index + 1)..end_index])?;
instructions.push(BasicAstInstruction { instructions.push(BasicAstInstruction {
label: current_label.take(), label: current_label.take(),
operation: BasicAstOperation::IfThenElse(condition, true_branch, false_branch) operation: BasicAstOperation::IfThenElse(
condition,
true_branch,
false_branch,
),
}); });
} else { } else {
let true_branch = build_ast(&tokens[(then_index + 1)..end_index])?; let true_branch = build_ast(&tokens[(then_index + 1)..end_index])?;
instructions.push(BasicAstInstruction { instructions.push(BasicAstInstruction {
label: current_label.take(), label: current_label.take(),
operation: BasicAstOperation::IfThenElse(condition, true_branch, BasicAstBlock::default()) operation: BasicAstOperation::IfThenElse(
condition,
true_branch,
BasicAstBlock::default(),
),
}); });
} }
tokens = &tokens[end_index..]; tokens.take(end_index);
} }
_ => { _ => {
return Err(ParseError::UnexpectedToken(tokens[0].clone())); return Err(ParseError::UnexpectedToken(tokens[0].clone()));
@ -396,4 +418,174 @@ mod test {
], ],
); );
} }
#[test]
fn test_operator_precedence() {
fn test_parse<const N: usize>(list: [BasicToken; N]) -> BasicAstExpression {
parse_expression(&mut Cursor::from(&list)).unwrap()
}
fn test_err<const N: usize>(list: [BasicToken; N]) -> ParseError {
parse_expression(&mut Cursor::from(&list)).err().unwrap()
}
assert_eq!(
test_parse([BasicToken::Name(String::from("hello"))]),
BasicAstExpression::Variable(String::from("hello"))
);
assert_eq!(
test_parse([
BasicToken::Name(String::from("hello")),
BasicToken::Name(String::from("world")),
]),
BasicAstExpression::Variable(String::from("hello"))
);
assert_eq!(
test_parse([
BasicToken::Name(String::from("hello")),
BasicToken::Operator(Operator::Add),
BasicToken::Integer(1),
]),
BasicAstExpression::Binary(
Operator::Add,
Box::new(BasicAstExpression::Variable(String::from("hello"))),
Box::new(BasicAstExpression::Integer(1)),
)
);
assert_eq!(
test_parse([
BasicToken::Name(String::from("hello")),
BasicToken::Operator(Operator::Add),
BasicToken::Integer(2),
BasicToken::Operator(Operator::Mul),
BasicToken::Name(String::from("world")),
]),
BasicAstExpression::Binary(
Operator::Add,
Box::new(BasicAstExpression::Variable(String::from("hello"))),
Box::new(BasicAstExpression::Binary(
Operator::Mul,
Box::new(BasicAstExpression::Integer(2)),
Box::new(BasicAstExpression::Variable(String::from("world"))),
)),
)
);
assert_eq!(
test_parse([
BasicToken::Name(String::from("hello")),
BasicToken::Operator(Operator::Mul),
BasicToken::Integer(2),
BasicToken::Operator(Operator::Add),
BasicToken::Name(String::from("world")),
]),
BasicAstExpression::Binary(
Operator::Add,
Box::new(BasicAstExpression::Binary(
Operator::Mul,
Box::new(BasicAstExpression::Variable(String::from("hello"))),
Box::new(BasicAstExpression::Integer(2)),
)),
Box::new(BasicAstExpression::Variable(String::from("world"))),
)
);
assert_eq!(
test_parse([
BasicToken::Name(String::from("hello")),
BasicToken::Operator(Operator::Mul),
BasicToken::OpenParen,
BasicToken::Integer(2),
BasicToken::Operator(Operator::Add),
BasicToken::Name(String::from("world")),
BasicToken::CloseParen,
]),
BasicAstExpression::Binary(
Operator::Mul,
Box::new(BasicAstExpression::Variable(String::from("hello"))),
Box::new(BasicAstExpression::Binary(
Operator::Add,
Box::new(BasicAstExpression::Integer(2)),
Box::new(BasicAstExpression::Variable(String::from("world"))),
)),
)
);
assert_eq!(
test_parse([
BasicToken::Name(String::from("hello")),
BasicToken::Operator(Operator::Add),
BasicToken::OpenParen,
BasicToken::Name(String::from("world")),
BasicToken::Operator(Operator::Mul),
BasicToken::Integer(2),
BasicToken::CloseParen,
]),
BasicAstExpression::Binary(
Operator::Add,
Box::new(BasicAstExpression::Variable(String::from("hello"))),
Box::new(BasicAstExpression::Binary(
Operator::Mul,
Box::new(BasicAstExpression::Variable(String::from("world"))),
Box::new(BasicAstExpression::Integer(2)),
)),
)
);
assert_eq!(
test_err([
BasicToken::Name(String::from("hello")),
BasicToken::Operator(Operator::Add),
]),
ParseError::ExpectedOperand
);
assert_eq!(
test_err([
BasicToken::Name(String::from("hello")),
BasicToken::Operator(Operator::Add),
BasicToken::OpenParen,
BasicToken::Name(String::from("world")),
BasicToken::Operator(Operator::Mul),
BasicToken::Integer(2),
]),
ParseError::MissingToken(BasicToken::CloseParen)
);
assert_eq!(
test_err([
BasicToken::Name(String::from("hello")),
BasicToken::Operator(Operator::Add),
BasicToken::Operator(Operator::Mul),
]),
ParseError::UnexpectedToken(BasicToken::Operator(Operator::Mul))
);
assert!(matches!(
test_err([
BasicToken::Name(String::from("hello")),
BasicToken::Operator(Operator::Add),
BasicToken::OpenParen,
]),
ParseError::ExpectedOperand | ParseError::MissingToken(BasicToken::CloseParen)
));
assert!(matches!(
test_err([
BasicToken::Name(String::from("hello")),
BasicToken::Operator(Operator::Add),
BasicToken::OpenParen,
BasicToken::CloseParen
]),
ParseError::ExpectedOperand | ParseError::UnexpectedToken(BasicToken::CloseParen)
));
assert_eq!(
test_err([BasicToken::Operator(Operator::Add), BasicToken::Integer(2)]),
ParseError::UnexpectedToken(BasicToken::Operator(Operator::Add))
);
}
} }

Loading…
Cancel
Save