From ab0380cd790e21db89bc51f2a9bfd5547ce90c73 Mon Sep 17 00:00:00 2001 From: Adrien Burgun Date: Thu, 21 Sep 2023 13:13:39 +0200 Subject: [PATCH] :sparkles: Implement basic precedence expression parser --- src/main.rs | 2 - src/parse/mod.rs | 181 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 180 insertions(+), 3 deletions(-) diff --git a/src/main.rs b/src/main.rs index ed1ffda..e7a11a9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,3 @@ - - fn main() { println!("Hello, world!"); } diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 9b6d39c..56bdffd 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -18,6 +18,19 @@ pub enum Operator { // etc. } +impl Operator { + fn precedence(self) -> u8 { + use Operator as O; + match self { + O::Add | O::Sub => 5, + O::RShift | O::LShift => 4, + O::Mod => 3, + O::Mul | O::Div => 0, + O::Eq | O::Neq | O::Gt | O::Lt | O::Gte | O::Lte => 10, + } + } +} + #[derive(PartialEq, Clone, Debug)] pub enum BasicToken { NewLine, @@ -29,7 +42,7 @@ pub enum BasicToken { Goto, OpenParen, CloseParen, - Integer(u64), + Integer(i64), Float(f64), Name(String), String(String), @@ -39,6 +52,9 @@ pub enum BasicToken { #[derive(PartialEq, Clone, Debug)] pub enum ParseError { InvalidToken(String), + UnexpectedToken(BasicToken), + MissingToken(BasicToken), + ExpectedOperand, } pub fn tokenize(raw: &str) -> Result, ParseError> { @@ -150,6 +166,169 @@ pub fn tokenize(raw: &str) -> Result, ParseError> { Ok(res) } +pub enum BasicAstExpression { + Integer(i64), + Float(f64), + Variable(String), + Binary(Operator, Box, Box), +} + +pub enum BasicAstOperation { + Assign(String, BasicAstExpression), + Jump(String), + IfThenElse(BasicAstExpression, BasicAstBlock, BasicAstBlock), +} + +pub struct BasicAstInstruction { + pub label: Option, + pub operation: BasicAstOperation, +} + +#[derive(Default)] +pub struct BasicAstBlock { + pub instructions: Vec, +} + +fn find_token_index(tokens: &[BasicToken], needle: BasicToken) -> Result { + tokens + .iter() + .enumerate() + .find(|(_, t)| **t == needle) + .map(|(i, _)| i) + .ok_or(ParseError::MissingToken(needle)) +} + +fn parse_expression(mut tokens: &[BasicToken]) -> Result { + /// Advances `tokens` by `by` tokens, skipping the first newline tokens if present + fn advance(tokens: &mut &[BasicToken], by: usize) { + while let Some(BasicToken::NewLine) = tokens.get(0) { + *tokens = &(*tokens)[1..]; + } + *tokens = &(*tokens)[by..]; + } + + /// Returns the first non-newline token in `tokens` + fn peek<'a>(tokens: &'a &[BasicToken]) -> Option<&'a BasicToken> { + tokens.iter().find(|t| !matches!(t, BasicToken::NewLine)) + } + + /// Parses a single expression item + fn parse_expression_item(tokens: &mut &[BasicToken]) -> Result { + match *tokens { + [BasicToken::Integer(int), ..] => { + advance(tokens, 1); + Ok(BasicAstExpression::Integer(*int)) + }, + [BasicToken::Float(float), ..] => { + advance(tokens, 1); + Ok(BasicAstExpression::Float(*float)) + }, + [BasicToken::Name(_fn_name), BasicToken::OpenParen, ..] => { + unimplemented!("Function calls are not yet supported"); + }, + [BasicToken::Name(name), ..] => { + advance(tokens, 1); + Ok(BasicAstExpression::Variable(name.clone())) + }, + [] => Err(ParseError::ExpectedOperand), + _ => Err(ParseError::UnexpectedToken(tokens[0].clone())), + } + } + + /// Given an lhs and a minimum precedence, eats as many binary operations as possible, + /// recursively calling itself when an operator with a higher precedence is encountered. + /// + /// See https://en.wikipedia.org/wiki/Operator-precedence_parser for more information + fn parse_expression_main(tokens: &mut &[BasicToken], lhs: BasicAstExpression, min_precedence: u8) -> Result { + let mut ast = lhs; + while let Some(&BasicToken::Operator(operator)) = peek(tokens) { + if operator.precedence() < min_precedence { + break + } + advance(tokens, 1); + let mut rhs = parse_expression_item(tokens)?; + while let Some(&BasicToken::Operator(sub_operator)) = peek(tokens) { + if sub_operator.precedence() > operator.precedence() { + rhs = parse_expression_main(tokens, rhs, operator.precedence() + 1)?; + } + } + + ast = BasicAstExpression::Binary(operator, Box::new(ast), Box::new(rhs)); + } + + Ok(ast) + } + + // Remove starting newlines + let lhs = parse_expression_item(&mut tokens)?; + advance(&mut tokens, 1); + let res = parse_expression_main(&mut tokens, lhs, 0)?; + + assert_eq!(tokens, []); + + Ok(res) +} + +pub fn build_ast(mut tokens: &[BasicToken]) -> Result { + let mut instructions = Vec::new(); + let mut current_label: Option = None; + + while tokens.len() > 0 { + match &tokens[..] { + [BasicToken::NewLine, BasicToken::Integer(label), ..] => { + tokens = &tokens[2..]; + current_label = Some(label.to_string()); + } + [BasicToken::NewLine, BasicToken::Name(label), ..] => { + tokens = &tokens[2..]; + current_label = Some(label.clone()); + } + [BasicToken::NewLine, ..] => { + tokens = &tokens[1..]; + current_label = None; + } + [BasicToken::Name(variable_name), BasicToken::Assign, ..] => { + tokens = &tokens[2..]; + let expression = parse_expression(tokens)?; + // TODO: advance `tokens` + instructions.push(BasicAstInstruction { + label: current_label.take(), + operation: BasicAstOperation::Assign(variable_name.clone(), expression) + }); + } + [BasicToken::If, ..] => { + tokens = &tokens[1..]; + let then_index = find_token_index(tokens, BasicToken::Then)?; + let end_index = find_token_index(tokens, BasicToken::EndIf)?; + + let condition = parse_expression(&tokens[0..then_index])?; + if let Ok(else_index) = find_token_index(tokens, BasicToken::Else) { + let true_branch = build_ast(&tokens[(then_index + 1)..else_index])?; + let false_branch = build_ast(&tokens[(else_index + 1)..end_index])?; + + instructions.push(BasicAstInstruction { + label: current_label.take(), + operation: BasicAstOperation::IfThenElse(condition, true_branch, false_branch) + }); + } else { + let true_branch = build_ast(&tokens[(then_index + 1)..end_index])?; + instructions.push(BasicAstInstruction { + label: current_label.take(), + operation: BasicAstOperation::IfThenElse(condition, true_branch, BasicAstBlock::default()) + }); + } + + tokens = &tokens[end_index..]; + } + _ => { + return Err(ParseError::UnexpectedToken(tokens[0].clone())); + } + } + } + + Ok(BasicAstBlock { instructions }) +} + #[cfg(test)] mod test { use super::*;