Fix parsing of negative int patterns and constants

This was trickier than expected as the expression parser, and in particular the bin-op parser will interpret negative patterns as a continuation of a binary operation and eventually choke on the next right-arrow symbol. This is due to how we actually completely erase newlines once we're done with the lexer. The newline separating when clause is actually semantically important. In principle, we could only parse an expression until the next newline.

  Ideally, we would keep that newline in the list of token but it's difficult to figure out which newline to keep between two right arrows since a clause guard can be written over multiple lines. Though, since we know that this is only truly a problem for negative integers, we can use the same trick as for tuples and define a new 'NewLineMinus' token. That token CANNOT be part of a binop expression. That means it's impossible to write a binary operation with a minus over multiple lines, or more specifically, with the '-' symbol on a newline. This sounds like a fair limitation. What we get in exchange is less ambiguity when parsing patterns following expressions in when clause cases.

  Another more cumbersome option could be to preserve the first newline encountered after a 'right-arrow' symbol and before any parenthesis or curly brace is found (which would otherwise signal the beginning of a new block). That requires to traverse, at least partially, the list of tokens twice. This feels unnecessary for now and until we do face a similar issue with a binary operator.
This commit is contained in:
KtorZ 2023-07-06 19:16:33 +02:00 committed by Lucas
parent 346df47232
commit 78d34f7f76
11 changed files with 171 additions and 26 deletions

View File

@ -2,7 +2,7 @@ use chumsky::prelude::*;
use crate::{
ast,
parser::{annotation, error::ParseError, literal::bytearray, token::Token, utils},
parser::{annotation, error::ParseError, literal, token::Token, utils},
};
pub fn parser() -> impl Parser<Token, ast::UntypedDefinition, Error = ParseError> {
@ -39,20 +39,20 @@ pub fn value() -> impl Parser<Token, ast::Constant, Error = ParseError> {
});
let constant_int_parser =
select! {Token::Int {value, base} => (value, base)}.map_with_span(|(value, base), span| {
ast::Constant::Int {
location: span,
literal::int().map_with_span(|(value, base), location| ast::Constant::Int {
location,
value,
base,
}
});
let constant_bytearray_parser =
bytearray(|bytes, preferred_format, span| ast::Constant::ByteArray {
location: span,
literal::bytearray(
|bytes, preferred_format, location| ast::Constant::ByteArray {
location,
bytes,
preferred_format,
});
},
);
choice((
constant_string_parser,

View File

@ -6,14 +6,14 @@ use crate::{
};
pub fn parser(
r: Recursive<'_, Token, UntypedExpr, ParseError>,
expression: Recursive<'_, Token, UntypedExpr, ParseError>,
) -> impl Parser<Token, UntypedExpr, Error = ParseError> + '_ {
just(Token::LeftSquare)
.ignore_then(r.clone().separated_by(just(Token::Comma)))
.ignore_then(expression.clone().separated_by(just(Token::Comma)))
.then(choice((
just(Token::Comma).ignore_then(
just(Token::DotDot)
.ignore_then(r.clone())
.ignore_then(expression)
.map(Box::new)
.or_not(),
),

View File

@ -56,7 +56,7 @@ pub fn pure_expression<'a>(
// Negate
let op = choice((
just(Token::Bang).to(ast::UnOp::Not),
just(Token::Minus)
choice((just(Token::Minus), just(Token::NewLineMinus)))
// NOTE: Prevent conflict with usage for '-' as a standalone binary op.
// This will make '-' parse when used as standalone binop in a function call.
// For example:

View File

@ -12,15 +12,15 @@ use crate::{
};
pub fn parser(
r: Recursive<'_, Token, UntypedExpr, ParseError>,
expression: Recursive<'_, Token, UntypedExpr, ParseError>,
) -> impl Parser<Token, UntypedExpr, Error = ParseError> + '_ {
just(Token::When)
// TODO: If subject is empty we should return ParseErrorType::ExpectedExpr,
.ignore_then(r.clone().map(Box::new))
.ignore_then(expression.clone().map(Box::new))
.then_ignore(just(Token::Is))
.then_ignore(just(Token::LeftBrace))
// TODO: If clauses are empty we should return ParseErrorType::NoCaseClause
.then(clause(r).repeated())
.then(clause(expression).repeated())
.then_ignore(just(Token::RightBrace))
.map_with_span(|(subject, clauses), span| UntypedExpr::When {
location: span,

View File

@ -59,6 +59,13 @@ pub fn run(src: &str) -> Result<LexInfo, Vec<ParseError>> {
Some((Token::LeftParen, *span))
}
}
Token::Minus => {
if previous_is_newline {
Some((Token::NewLineMinus, *span))
} else {
Some((Token::Minus, *span))
}
}
Token::Pipe => {
if previous_is_newline {
Some((Token::NewLinePipe, *span))

View File

@ -0,0 +1,21 @@
use chumsky::prelude::*;
use crate::parser::{
error::ParseError,
token::{Base, Token},
};
pub fn parser() -> impl Parser<Token, (String, Base), Error = ParseError> {
choice((just(Token::NewLineMinus), just(Token::Minus)))
.ignored()
.or_not()
.map(|v| v.is_some())
.then(select! { Token::Int {value, base} => (value, base)})
.map(|(is_negative, (value, base))| {
if is_negative {
(format!("-{value}"), base)
} else {
(value, base)
}
})
}

View File

@ -1,7 +1,9 @@
mod bytearray;
mod int;
mod string;
mod uint;
pub use bytearray::{array_of_bytes, hex_string, parser as bytearray, utf8_string};
pub use int::parser as int;
pub use string::parser as string;
pub use uint::parser as uint;

View File

@ -2,15 +2,41 @@ use chumsky::prelude::*;
use crate::{
ast::UntypedPattern,
parser::{error::ParseError, token::Token},
parser::{error::ParseError, literal, token::Token},
};
pub fn parser() -> impl Parser<Token, UntypedPattern, Error = ParseError> {
select! {Token::Int {value, base} => (value, base)}.map_with_span(|(value, base), location| {
UntypedPattern::Int {
literal::int().map_with_span(|(value, base), location| UntypedPattern::Int {
location,
value,
base,
}
})
}
#[cfg(test)]
mod tests {
use crate::assert_expr;
#[test]
fn pattern_negative_int() {
assert_expr!(
r#"
when foo is {
-1 -> True
}
"#
);
}
#[test]
fn pattern_negative_int_not_first_case() {
assert_expr!(
r#"
when bar is {
42 -> -14
-42 -> 14
}
"#
);
}
}

View File

@ -0,0 +1,30 @@
---
source: crates/aiken-lang/src/parser/pattern/int.rs
description: "Code:\n\nwhen foo is {\n -1 -> True\n}\n"
---
When {
location: 0..28,
subject: Var {
location: 5..8,
name: "foo",
},
clauses: [
UntypedClause {
location: 16..26,
patterns: [
Int {
location: 16..18,
value: "-1",
base: Decimal {
numeric_underscore: false,
},
},
],
guard: None,
then: Var {
location: 22..26,
name: "True",
},
},
],
}

View File

@ -0,0 +1,57 @@
---
source: crates/aiken-lang/src/parser/pattern/int.rs
description: "Code:\n\nwhen bar is {\n 42 -> -14\n -42 -> 14\n}\n"
---
When {
location: 0..39,
subject: Var {
location: 5..8,
name: "bar",
},
clauses: [
UntypedClause {
location: 16..25,
patterns: [
Int {
location: 16..18,
value: "42",
base: Decimal {
numeric_underscore: false,
},
},
],
guard: None,
then: UnOp {
op: Negate,
location: 22..25,
value: UInt {
location: 23..25,
value: "14",
base: Decimal {
numeric_underscore: false,
},
},
},
},
UntypedClause {
location: 28..37,
patterns: [
Int {
location: 28..31,
value: "-42",
base: Decimal {
numeric_underscore: false,
},
},
],
guard: None,
then: UInt {
location: 35..37,
value: "14",
base: Decimal {
numeric_underscore: false,
},
},
},
],
}

View File

@ -27,6 +27,7 @@ pub enum Token {
// Int Operators
Plus,
Minus,
NewLineMinus,
Star,
Slash,
Less,
@ -115,6 +116,7 @@ impl fmt::Display for Token {
Token::RightBrace => "}",
Token::Plus => "+",
Token::Minus => "-",
Token::NewLineMinus => "↳-",
Token::Star => "*",
Token::Slash => "/",
Token::Less => "<",