From e823b5f807761c7b7f49683f27f0395c46707a4b Mon Sep 17 00:00:00 2001 From: Adrien Burgun Date: Wed, 4 Oct 2023 21:11:21 +0200 Subject: [PATCH] :sparkles: Pretty errors --- src/config.rs | 57 +++++++++---- src/cursor.rs | 10 +++ src/lib.rs | 1 + src/main.rs | 8 +- src/parse/ast.rs | 189 ++++++++++++++++++++++++++---------------- src/parse/mod.rs | 79 +++++++++++++++++- src/parse/test.rs | 50 +++++++---- src/parse/tokenize.rs | 33 +++++--- src/repr/mod.rs | 1 + src/repr/position.rs | 124 +++++++++++++++++++++++++++ 10 files changed, 432 insertions(+), 120 deletions(-) create mode 100644 src/repr/position.rs diff --git a/src/config.rs b/src/config.rs index c6778d6..9d5d617 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,8 +1,9 @@ use std::collections::HashMap; use crate::{ - parse::ParseError, + parse::{ParseError, ParseErrorKind}, repr::basic::{BasicAstExpression, BasicAstInstruction}, + repr::position::Position, }; pub struct Config { @@ -11,7 +12,7 @@ pub struct Config { /// Used for functions like `print_flush_world` pub special_functions: HashMap< String, - Box) -> Result>, + Box, Position) -> Result>, >, } @@ -32,14 +33,17 @@ impl Default for Config { let mut special_functions: HashMap< String, - Box) -> Result>, + Box, Position) -> Result>, > = HashMap::new(); special_functions.insert( String::from("print_flush_global"), - Box::new(|arguments: Vec| { + Box::new(|arguments: Vec, position| { let BasicAstExpression::Variable(buffer) = &arguments[0] else { - return Err(ParseError::InvalidArgument(arguments[0].clone())); + return Err(ParseError::new( + ParseErrorKind::InvalidArgument(arguments[0].clone()), + position, + )); }; let expected_length = match buffer.as_str() { @@ -47,14 +51,22 @@ impl Default for Config { "mission" => 1, "announce" => 2, "toast" => 2, - _ => return Err(ParseError::InvalidArgument(arguments[0].clone())), + _ => { + return Err(ParseError::new( + ParseErrorKind::InvalidArgument(arguments[0].clone()), + position, + )) + } }; if arguments.len() != expected_length { - return Err(ParseError::InvalidArgumentCount( - String::from("print_flush_global"), - expected_length, - arguments.len(), + return Err(ParseError::new( + ParseErrorKind::InvalidArgumentCount( + String::from("print_flush_global"), + expected_length, + arguments.len(), + ), + position, )); } @@ -67,9 +79,12 @@ impl Default for Config { special_functions.insert( String::from("control"), - Box::new(|arguments| { + Box::new(|arguments, position| { let BasicAstExpression::Variable(buffer) = &arguments[0] else { - return Err(ParseError::InvalidArgument(arguments[0].clone())); + return Err(ParseError::new( + ParseErrorKind::InvalidArgument(arguments[0].clone()), + position, + )); }; let expected_length = match buffer.as_str() { @@ -78,14 +93,22 @@ impl Default for Config { "shootp" => 4, "config" => 3, "color" => 3, - _ => return Err(ParseError::InvalidArgument(arguments[0].clone())), + _ => { + return Err(ParseError::new( + ParseErrorKind::InvalidArgument(arguments[0].clone()), + position, + )) + } }; if arguments.len() != expected_length { - return Err(ParseError::InvalidArgumentCount( - String::from("control"), - expected_length, - arguments.len(), + return Err(ParseError::new( + ParseErrorKind::InvalidArgumentCount( + String::from("control"), + expected_length, + arguments.len(), + ), + position, )); } diff --git a/src/cursor.rs b/src/cursor.rs index 382ad5f..6aab5b6 100644 --- a/src/cursor.rs +++ b/src/cursor.rs @@ -113,4 +113,14 @@ impl<'a, T> Cursor<'a, T> { offset: 0, } } + + /// Returns the un-offset-ed underlying data + pub fn full_data(&self) -> &'a [T] { + self.data + } + + /// Returns the offset + pub fn offset(&self) -> usize { + self.offset + } } diff --git a/src/lib.rs b/src/lib.rs index 4098c9e..c1db9a8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,4 +11,5 @@ pub mod prelude { pub use crate::repr::basic::*; pub use crate::repr::mlog::*; pub use crate::repr::operator::*; + pub use crate::repr::position::*; } diff --git a/src/main.rs b/src/main.rs index adc0768..fb2b25f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -28,8 +28,12 @@ fn main() { let source = std::fs::read_to_string(&args.input).expect("Couldn't read input file"); let config = Config::default(); - let tokens = tokenize(&source).unwrap(); - let parsed = build_ast(&tokens, &config).unwrap(); + let tokens = tokenize(&source).unwrap_or_else(|err| { + err.display_panic(&source); + }); + let parsed = build_ast(&tokens, &config).unwrap_or_else(|err| { + err.display_panic(&source); + }); let transformed = translate_ast(&parsed, &mut Namer::default(), &config); let optimized = if opt_level == OptLevel::Conservative { diff --git a/src/parse/ast.rs b/src/parse/ast.rs index 33531c3..deecca7 100644 --- a/src/parse/ast.rs +++ b/src/parse/ast.rs @@ -33,7 +33,10 @@ macro_rules! pop_context { } } -pub fn build_ast(tokens: &[BasicToken], config: &Config) -> Result { +pub fn build_ast( + tokens: &[(BasicToken, Position)], + config: &Config, +) -> Result { enum Context { Main, If(BasicAstExpression), @@ -58,7 +61,11 @@ pub fn build_ast(tokens: &[BasicToken], config: &Config) -> Result { tokens.take(2); instructions.push(BasicAstInstruction::JumpLabel(label.to_string())); @@ -92,7 +99,7 @@ pub fn build_ast(tokens: &[BasicToken], config: &Config) -> Result { - return Err(ParseError::UnexpectedToken(BasicToken::Else)); + return Err(ParseError::unexpected_token(BasicToken::Else, position)); } } } @@ -115,7 +122,7 @@ pub fn build_ast(tokens: &[BasicToken], config: &Config) -> Result { - return Err(ParseError::UnexpectedToken(BasicToken::EndIf)); + return Err(ParseError::unexpected_token(BasicToken::EndIf, position)); } }); } @@ -130,7 +137,7 @@ pub fn build_ast(tokens: &[BasicToken], config: &Config) -> Result Result { if *variable != expected_variable { - return Err(ParseError::WrongForVariable( - expected_variable, - variable.clone(), + return Err(ParseError::new( + ParseErrorKind::WrongForVariable( + expected_variable, + variable.clone(), + ), + last_position(&tokens), )); } @@ -163,7 +173,7 @@ pub fn build_ast(tokens: &[BasicToken], config: &Config) -> Result { - return Err(ParseError::UnexpectedToken(BasicToken::Next)); + return Err(ParseError::unexpected_token(BasicToken::Next, position)); } }); } @@ -196,7 +206,7 @@ pub fn build_ast(tokens: &[BasicToken], config: &Config) -> Result { - return Err(ParseError::UnexpectedToken(BasicToken::Wend)); + return Err(ParseError::unexpected_token(BasicToken::Wend, position)); } }); } @@ -210,10 +220,10 @@ pub fn build_ast(tokens: &[BasicToken], config: &Config) -> Result { - return Err(ParseError::UnexpectedToken(BasicToken::While)); + return Err(ParseError::unexpected_token(BasicToken::While, last_position(&tokens))); }, _ => { - return Err(ParseError::UnexpectedToken(BasicToken::Loop)); + return Err(ParseError::unexpected_token(BasicToken::Loop, position)); } }); } @@ -225,10 +235,10 @@ pub fn build_ast(tokens: &[BasicToken], config: &Config) -> Result { - return Err(ParseError::MissingToken(BasicToken::While)); + return Err(ParseError::missing_token(BasicToken::While, position.at_end())); }, _ => { - return Err(ParseError::UnexpectedToken(BasicToken::Wend)); + return Err(ParseError::unexpected_token(BasicToken::Wend, position)); } }); } @@ -272,12 +282,12 @@ pub fn build_ast(tokens: &[BasicToken], config: &Config) -> Result Result {} - Some(other) => { - return Err(ParseError::UnexpectedToken(other.clone())); + Some((BasicToken::NewLine, _)) | None => {} + Some((other, position)) => { + return Err(ParseError::unexpected_token(other.clone(), *position)); } } } @@ -296,46 +306,51 @@ pub fn build_ast(tokens: &[BasicToken], config: &Config) -> Result { + Some((BasicToken::Comma, _)) => { tokens.take(1); } - Some(BasicToken::CloseParen) => break, - _ => return Err(ParseError::MissingToken(BasicToken::Comma)), + Some((BasicToken::CloseParen, _)) => break, + _ => { + return Err(ParseError::missing_token( + BasicToken::Comma, + last_position(&tokens), + )) + } } } - match tokens.take(1) { - [BasicToken::CloseParen] => {} - [other] => { - return Err(ParseError::UnexpectedToken(other.clone())); - } - _ => { - return Err(ParseError::MissingToken(BasicToken::CloseParen)); - } - } + expect_next_token(&tokens, &BasicToken::CloseParen)?; + tokens.take(1); + let span = position.until(last_position(&tokens)); let lowercase_fn_name = fn_name.to_lowercase(); if let Some(translation_fn) = config.special_functions.get(&lowercase_fn_name) { - instructions.push(translation_fn(arguments)?); + instructions.push(translation_fn(arguments, span)?); } else if let Some((_, mutating, n_args)) = config.builtin_functions.get(&lowercase_fn_name) { if *mutating { let BasicAstExpression::Variable(_) = &arguments[0] else { - return Err(ParseError::ExpectedVariable); + return Err(ParseError::new( + ParseErrorKind::ExpectedVariable, + last_position(&tokens), + )); }; } if arguments.len() != *n_args { - return Err(ParseError::InvalidArgumentCount( - lowercase_fn_name, - *n_args, - arguments.len(), + return Err(ParseError::new( + ParseErrorKind::InvalidArgumentCount( + lowercase_fn_name, + *n_args, + arguments.len(), + ), + span, )); } @@ -353,7 +368,7 @@ pub fn build_ast(tokens: &[BasicToken], config: &Config) -> Result Result 1 { + let position = last_position(&tokens).at_end(); + match &context_stack.last().unwrap().1 { Context::If(_) | Context::IfElse(_, _) => { - return Err(ParseError::MissingToken(BasicToken::EndIf)); + return Err(ParseError::missing_token(BasicToken::EndIf, position)); } Context::For(_, _, _, _) => { - return Err(ParseError::MissingToken(BasicToken::Next)); + return Err(ParseError::missing_token(BasicToken::Next, position)); } Context::While(_) => { - return Err(ParseError::MissingToken(BasicToken::Wend)); + return Err(ParseError::missing_token(BasicToken::Wend, position)); } Context::Do | Context::DoWhile(_) => { - return Err(ParseError::MissingToken(BasicToken::Loop)); + return Err(ParseError::missing_token(BasicToken::Loop, position)); } Context::Main => { unreachable!("There cannot be another context below the main context"); @@ -388,13 +405,19 @@ pub fn build_ast(tokens: &[BasicToken], config: &Config) -> Result Result { +fn find_token_index( + tokens: &[(BasicToken, Position)], + needle: BasicToken, +) -> Result { tokens .iter() .enumerate() - .find(|(_, t)| **t == needle) + .find(|(_index, (t, _pos))| *t == needle) .map(|(i, _)| i) - .ok_or(ParseError::MissingToken(needle)) + .ok_or(ParseError::missing_token( + needle, + tokens.last().map(|pair| pair.1).unwrap_or_default(), + )) } macro_rules! impl_op_basic_ast_expression { @@ -416,18 +439,23 @@ impl_op_basic_ast_expression!(std::ops::Mul, mul, Operator::Mul); impl_op_basic_ast_expression!(std::ops::Div, div, Operator::Div); pub(crate) fn parse_expression( - tokens: &mut Cursor<'_, BasicToken>, + tokens: &mut Cursor<'_, (BasicToken, Position)>, ) -> Result { /// Returns the first non-newline token in `tokens` - fn peek(tokens: &[BasicToken]) -> Option<&BasicToken> { - tokens.iter().find(|t| !matches!(t, BasicToken::NewLine)) + fn peek(tokens: &[(BasicToken, Position)]) -> Option<&BasicToken> { + tokens + .iter() + .find(|t| !matches!(t.0, BasicToken::NewLine)) + .map(|pair| &pair.0) } /// Parses a single expression item fn parse_expression_item( - tokens: &mut Cursor<'_, BasicToken>, + tokens: &mut Cursor<'_, (BasicToken, Position)>, ) -> Result { - match tokens.peek(2) { + let position = tokens.get(0).map(|pair| pair.1).unwrap_or_default(); + + match &drop_position(tokens.peek(2))[..] { [BasicToken::Integer(int), ..] => { tokens.take(1); Ok(BasicAstExpression::Integer(*int)) @@ -440,27 +468,31 @@ pub(crate) fn parse_expression( tokens.take(2); let fn_name_lowercase = fn_name.to_ascii_lowercase(); let mut arguments = Vec::new(); - while tokens.get(0) != Some(&BasicToken::CloseParen) { + while tokens.get(0).map(|pair| &pair.0) != Some(&BasicToken::CloseParen) { arguments.push(parse_expression(tokens)?); - match tokens.get(0) { + match tokens.get(0).map(|pair| &pair.0) { Some(BasicToken::Comma) => { tokens.take(1); } Some(BasicToken::CloseParen) => break, - _ => return Err(ParseError::MissingToken(BasicToken::Comma)), + _ => return Err(ParseError::missing_token(BasicToken::Comma, position)), } } expect_next_token(tokens, &BasicToken::CloseParen)?; tokens.take(1); + let span = position.until(last_position(tokens)); if let Ok(unary_operator) = UnaryOperator::try_from(fn_name_lowercase.as_str()) { if arguments.len() != 1 { - Err(ParseError::InvalidArgumentCount( - fn_name_lowercase, - 1, - arguments.len(), + Err(ParseError::new( + ParseErrorKind::InvalidArgumentCount( + fn_name_lowercase, + 1, + arguments.len(), + ), + span, )) } else { Ok(BasicAstExpression::Unary( @@ -472,10 +504,13 @@ pub(crate) fn parse_expression( BasicOperator::from_fn_name(fn_name_lowercase.as_str()) { if arguments.len() != 2 { - Err(ParseError::InvalidArgumentCount( - fn_name_lowercase, - 2, - arguments.len(), + Err(ParseError::new( + ParseErrorKind::InvalidArgumentCount( + fn_name_lowercase, + 2, + arguments.len(), + ), + span, )) } else { let mut iter = arguments.into_iter(); @@ -503,14 +538,20 @@ pub(crate) fn parse_expression( [BasicToken::OpenParen, ..] => { tokens.take(1); let res = parse_expression(tokens)?; - if let Some(BasicToken::CloseParen) = tokens.take(1).get(0) { + if let Some((BasicToken::CloseParen, _)) = tokens.take(1).get(0) { Ok(res) } else { - Err(ParseError::MissingToken(BasicToken::CloseParen)) + Err(ParseError::missing_token( + BasicToken::CloseParen, + position.at_end(), + )) } } - [first, ..] => Err(ParseError::UnexpectedToken(first.clone())), - [] => Err(ParseError::ExpectedOperand), + [first, ..] => Err(ParseError::unexpected_token((**first).clone(), position)), + [] => Err(ParseError::new( + ParseErrorKind::ExpectedOperand, + last_position(&tokens).at_end(), + )), } } @@ -519,7 +560,7 @@ pub(crate) fn parse_expression( /// /// See https://en.wikipedia.org/wiki/Operator-precedence_parser for more information fn parse_expression_main( - tokens: &mut Cursor<'_, BasicToken>, + tokens: &mut Cursor<'_, (BasicToken, Position)>, lhs: BasicAstExpression, min_precedence: u8, ) -> Result { @@ -552,12 +593,18 @@ pub(crate) fn parse_expression( } fn expect_next_token( - tokens: &Cursor<'_, BasicToken>, + tokens: &Cursor<'_, (BasicToken, Position)>, expected: &BasicToken, ) -> Result<(), ParseError> { match tokens.get(0) { - Some(token) if token == expected => Ok(()), - Some(token) => Err(ParseError::UnexpectedToken(token.clone())), - None => Err(ParseError::MissingToken(expected.clone())), + Some((token, _position)) if token == expected => Ok(()), + Some((token, position)) => Err(ParseError::new( + ParseErrorKind::UnexpectedToken(token.clone()), + *position, + )), + None => Err(ParseError::missing_token( + expected.clone(), + tokens.last().unwrap().1, + )), } } diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 5702580..e07015d 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -4,13 +4,88 @@ pub use tokenize::*; mod ast; pub use ast::*; -use crate::repr::basic::BasicAstExpression; +use crate::repr::{basic::BasicAstExpression, position::Position}; #[cfg(test)] mod test; +#[derive(Clone, Debug)] +pub struct ParseError { + pub kind: ParseErrorKind, + pub position: Position, +} + +impl ParseError { + pub fn new(kind: ParseErrorKind, position: Position) -> Self { + Self { kind, position } + } + + pub(crate) fn missing_token(token: BasicToken, position: Position) -> Self { + Self { + kind: ParseErrorKind::MissingToken(token), + position, + } + } + + pub(crate) fn unexpected_token(token: BasicToken, position: Position) -> ParseError { + Self { + kind: ParseErrorKind::UnexpectedToken(token), + position, + } + } + + pub fn display_panic(&self, source: &str) -> ! { + self.position + .underline(source, &mut std::io::stderr()) + .unwrap(); + + eprint!("ERROR "); + + match &self.kind { + ParseErrorKind::InvalidToken(line_rest) => { + eprintln!( + "Syntax error: '{}' is not a valid token", + line_rest + .split_ascii_whitespace() + .nth(0) + .unwrap_or(&line_rest) + ); + } + ParseErrorKind::UnexpectedToken(token) => { + eprintln!("Parse error: this token ({:?}) isn't allowed here", token); + } + ParseErrorKind::MissingToken(token) => { + eprintln!( + "Parse error: expected a token ({:?}), but it is missing", + token + ); + } + ParseErrorKind::InvalidArgumentCount(fn_name, expected, actual) => { + eprintln!("Invalid argument count: expected {} arguments to the function '{}', got {} arguments", expected, fn_name, actual); + } + ParseErrorKind::ExpectedVariable => { + eprintln!("This parameter expects a variable name"); + } + ParseErrorKind::ExpectedOperand => { + eprintln!("Expected an expression operand here (eg. a number, a variable or a subexpression)"); + } + ParseErrorKind::WrongForVariable(expected, actual) => { + eprintln!( + "Variable of NEXT ('{}') does not match the variable of FOR ('{}')", + actual, expected + ); + } + ParseErrorKind::InvalidArgument(expr) => { + eprintln!("{:?} is not a valid argument to this function", expr); + } + } + + std::process::exit(1); + } +} + #[derive(PartialEq, Clone, Debug)] -pub enum ParseError { +pub enum ParseErrorKind { InvalidToken(String), UnexpectedToken(BasicToken), MissingToken(BasicToken), diff --git a/src/parse/test.rs b/src/parse/test.rs index bf4ccbd..5043aed 100644 --- a/src/parse/test.rs +++ b/src/parse/test.rs @@ -1,10 +1,14 @@ use super::*; use crate::prelude::*; +fn test_drop_position(tokens: Result, ParseError>) -> Vec { + tokens.unwrap().into_iter().map(|pair| pair.0).collect() +} + #[test] fn test_tokenize_basic() { assert_eq!( - tokenize("hello + world").unwrap(), + test_drop_position(tokenize("hello + world")), vec![ BasicToken::NewLine, BasicToken::Name(String::from("hello")), @@ -14,7 +18,7 @@ fn test_tokenize_basic() { ); assert_eq!( - tokenize("let thing = thing / 2").unwrap(), + test_drop_position(tokenize("let thing = thing / 2")), vec![ BasicToken::NewLine, BasicToken::Name(String::from("thing")), @@ -26,7 +30,7 @@ fn test_tokenize_basic() { ); assert_eq!( - tokenize("10 thing = thing + 0.5\ngoto 10").unwrap(), + test_drop_position(tokenize("10 thing = thing + 0.5\ngoto 10")), vec![ BasicToken::NewLine, BasicToken::Integer(10), @@ -42,7 +46,9 @@ fn test_tokenize_basic() { ); assert_eq!( - tokenize("x = 0\n\nif x > 0 then\nprint(\"Positive\")\nend if").unwrap(), + test_drop_position(tokenize( + "x = 0\n\nif x > 0 then\nprint(\"Positive\")\nend if" + )), vec![ BasicToken::NewLine, BasicToken::Name(String::from("x")), @@ -65,7 +71,7 @@ fn test_tokenize_basic() { ); assert_eq!( - tokenize("if x > 0 then\nend\nend if").unwrap(), + test_drop_position(tokenize("if x > 0 then\nend\nend if")), vec![ BasicToken::NewLine, BasicToken::If, @@ -84,7 +90,7 @@ fn test_tokenize_basic() { #[test] fn test_parse_for() { assert_eq!( - tokenize("FOR x = 0 TO y\nPRINT x\nNEXT x").unwrap(), + test_drop_position(tokenize("FOR x = 0 TO y\nPRINT x\nNEXT x")), vec![ BasicToken::NewLine, BasicToken::For, @@ -124,11 +130,25 @@ fn test_parse_for() { #[test] fn test_operator_precedence() { fn test_parse(list: [BasicToken; N]) -> BasicAstExpression { - parse_expression(&mut Cursor::from(&list)).unwrap() + parse_expression(&mut Cursor::from( + &list + .into_iter() + .map(|token| (token, Position::default())) + .collect::>()[..], + )) + .unwrap() } - fn test_err(list: [BasicToken; N]) -> ParseError { - parse_expression(&mut Cursor::from(&list)).err().unwrap() + fn test_err(list: [BasicToken; N]) -> ParseErrorKind { + parse_expression(&mut Cursor::from( + &list + .into_iter() + .map(|token| (token, Position::default())) + .collect::>()[..], + )) + .err() + .unwrap() + .kind } assert_eq!( @@ -242,7 +262,7 @@ fn test_operator_precedence() { BasicToken::Name(String::from("hello")), BasicToken::Operator(Operator::Add.into()), ]), - ParseError::ExpectedOperand + ParseErrorKind::ExpectedOperand ); assert_eq!( @@ -254,7 +274,7 @@ fn test_operator_precedence() { BasicToken::Operator(Operator::Mul.into()), BasicToken::Integer(2), ]), - ParseError::MissingToken(BasicToken::CloseParen) + ParseErrorKind::MissingToken(BasicToken::CloseParen) ); assert_eq!( @@ -263,7 +283,7 @@ fn test_operator_precedence() { BasicToken::Operator(Operator::Add.into()), BasicToken::Operator(Operator::Mul.into()), ]), - ParseError::UnexpectedToken(BasicToken::Operator(Operator::Mul.into())) + ParseErrorKind::UnexpectedToken(BasicToken::Operator(Operator::Mul.into())) ); assert!(matches!( @@ -272,7 +292,7 @@ fn test_operator_precedence() { BasicToken::Operator(Operator::Add.into()), BasicToken::OpenParen, ]), - ParseError::ExpectedOperand | ParseError::MissingToken(BasicToken::CloseParen) + ParseErrorKind::ExpectedOperand | ParseErrorKind::MissingToken(BasicToken::CloseParen) )); assert!(matches!( @@ -282,7 +302,7 @@ fn test_operator_precedence() { BasicToken::OpenParen, BasicToken::CloseParen ]), - ParseError::ExpectedOperand | ParseError::UnexpectedToken(BasicToken::CloseParen) + ParseErrorKind::ExpectedOperand | ParseErrorKind::UnexpectedToken(BasicToken::CloseParen) )); assert_eq!( @@ -290,7 +310,7 @@ fn test_operator_precedence() { BasicToken::Operator(Operator::Add.into()), BasicToken::Integer(2) ]), - ParseError::UnexpectedToken(BasicToken::Operator(Operator::Add.into())) + ParseErrorKind::UnexpectedToken(BasicToken::Operator(Operator::Add.into())) ); } diff --git a/src/parse/tokenize.rs b/src/parse/tokenize.rs index 25c9b76..0611ede 100644 --- a/src/parse/tokenize.rs +++ b/src/parse/tokenize.rs @@ -1,5 +1,5 @@ use super::ParseError; -use crate::prelude::*; +use crate::{parse::ParseErrorKind, prelude::*}; use regex::Regex; #[derive(PartialEq, Clone, Debug)] @@ -35,11 +35,11 @@ pub enum BasicToken { } /// Transforms a raw string into a sequence of `BasicToken`s -pub fn tokenize(raw: &str) -> Result, ParseError> { +pub fn tokenize(raw: &str) -> Result, ParseError> { macro_rules! match_token { - ( $line:expr, $res:expr $(;)? ) => {}; + ( $line:expr, $res:expr, $line_index:expr, $ch:ident $(;)? ) => {}; ( - $line:expr, $res:expr; + $line:expr, $res:expr, $line_index:expr, $ch:ident; $matcher:ident => (), $( $rest_matcher:ident $(($rest_match_name:ident))? => $rest_value:tt, @@ -47,17 +47,18 @@ pub fn tokenize(raw: &str) -> Result, ParseError> { ) => { if let Some(matched) = $matcher.find($line) { $line = &$line[matched.end()..]; + $ch += matched.len(); continue } match_token!( - $line, $res; + $line, $res, $line_index, $ch; $( $rest_matcher $(($rest_match_name))? => $rest_value, )* ); }; ( - $line:expr, $res:expr; + $line:expr, $res:expr, $line_index:expr, $ch:ident; $matcher:ident $(($match_name:ident))? => $value:expr, $( $rest_matcher:ident $(($rest_match_name:ident))? => $rest_value:tt, @@ -66,11 +67,13 @@ pub fn tokenize(raw: &str) -> Result, ParseError> { if let Some(matched) = $matcher.find($line) { $line = &$line[matched.end()..]; $(let $match_name = matched.as_str();)? - $res.push($value); + let len = matched.len(); + $res.push(($value, Position::span_ch($line_index, $ch, len))); + $ch += len; continue } match_token!( - $line, $res; + $line, $res, $line_index, $ch; $( $rest_matcher $(($rest_match_name))? => $rest_value, )* @@ -78,7 +81,7 @@ pub fn tokenize(raw: &str) -> Result, ParseError> { } } - let mut res = Vec::new(); + let mut res: Vec<(BasicToken, Position)> = Vec::new(); let match_let = Regex::new(r"(?i)^let").unwrap(); let match_jump = Regex::new(r"(?i)^go\s*to").unwrap(); let match_word = @@ -98,13 +101,14 @@ pub fn tokenize(raw: &str) -> Result, ParseError> { let match_comment = Regex::new(r"(?i)^rem\s.*$").unwrap(); // TODO: handle labels - for mut line in raw.lines() { + for (line_index, mut line) in raw.lines().enumerate() { + let mut ch = 0; if !line.is_empty() { - res.push(BasicToken::NewLine); + res.push((BasicToken::NewLine, Position::point(line_index, 0))); } while !line.is_empty() { // Main match clause for tokens - match_token!(line, res; + match_token!(line, res, line_index, ch; match_space => (), match_let => (), match_comment => (), @@ -166,7 +170,10 @@ pub fn tokenize(raw: &str) -> Result, ParseError> { ); // If this line is reached, then none of the matches above matched - return Err(ParseError::InvalidToken(line.to_string())); + return Err(ParseError::new( + ParseErrorKind::InvalidToken(line.to_string()), + Position::point(line_index, ch), + )); } } diff --git a/src/repr/mod.rs b/src/repr/mod.rs index 0ef22ad..05422e0 100644 --- a/src/repr/mod.rs +++ b/src/repr/mod.rs @@ -1,3 +1,4 @@ pub mod basic; pub mod mlog; pub mod operator; +pub mod position; diff --git a/src/repr/position.rs b/src/repr/position.rs new file mode 100644 index 0000000..76b4c91 --- /dev/null +++ b/src/repr/position.rs @@ -0,0 +1,124 @@ +use crate::cursor::Cursor; + +#[derive(Clone, Debug, PartialEq, Copy, Default)] +pub struct Position { + start: (usize, usize), + end: Option<(usize, usize)>, +} + +const MARGIN: usize = 2; + +impl Position { + pub fn point(line: usize, ch: usize) -> Self { + Self { + start: (line, ch), + end: None, + } + } + + pub fn span_ch(line: usize, ch: usize, len: usize) -> Self { + Self { + start: (line, ch), + end: Some((line, ch + len)), + } + } + + pub fn underline( + self, + raw: &str, + stream: &mut W, + ) -> Result<(), std::io::Error> { + let start_line = self.start.0; + let end_line = self.end.map(|pair| pair.0).unwrap_or(start_line); + + for (line_index, line) in raw.lines().enumerate() { + if line_index + MARGIN < start_line || line_index > end_line + MARGIN { + continue; + } + + if line_index >= start_line && line_index <= end_line { + writeln!(stream, "> {}", line)?; + + if start_line == end_line { + let len = if let Some((_, end_ch)) = self.end { + end_ch - self.start.1 + } else { + 0 + }; + + write!(stream, " ")?; + for _ in 0..self.start.1 { + write!(stream, " ")?; + } + write!(stream, "^")?; + if len > 1 { + for _ in 1..len { + write!(stream, "~")?; + } + } + writeln!(stream,)?; + } else { + if line_index == start_line { + write!(stream, " ")?; + for _ in 0..self.start.1 { + write!(stream, " ")?; + } + for _ in self.start.1..line.len() { + write!(stream, "~")?; + } + writeln!(stream,)?; + } else if line_index == end_line { + write!(stream, " ")?; + for _ in 0..self.end.unwrap().1 { + write!(stream, "~")?; + } + writeln!(stream,)?; + } else { + write!(stream, " ")?; + for _ in 0..line.len() { + write!(stream, "~")?; + } + writeln!(stream,)?; + } + } + } else { + writeln!(stream, "* {}", line)?; + } + } + + Ok(()) + } + + /// Returns the position pointing to the end of the current spanning position + pub fn at_end(&self) -> Self { + Self { + start: self.end.unwrap_or(self.start), + end: None, + } + } + + /// Returns a position spanning from the beginning of `self` to the end of `other` + pub fn until(&self, other: Self) -> Self { + Self { + start: self.start, + end: Some(other.end.unwrap_or(other.start)), + } + } +} + +pub(crate) fn drop_position(list: &[(T, Position)]) -> Vec<&T> { + list.iter().map(|(lhs, _rhs)| lhs).collect() +} + +pub(crate) fn last_position(cursor: &Cursor<'_, (T, Position)>) -> Position { + let data = cursor.full_data(); + let offset = cursor.offset(); + + match data.get(offset.saturating_sub(1)) { + Some((_, position)) => *position, + None => data + .last() + .map(|pair| pair.1) + .unwrap_or(Position::default()), + } +}