Fix parsing of negative int patterns and constants
This was trickier than expected as the expression parser, and in particular the bin-op parser will interpret negative patterns as a continuation of a binary operation and eventually choke on the next right-arrow symbol. This is due to how we actually completely erase newlines once we're done with the lexer. The newline separating when clause is actually semantically important. In principle, we could only parse an expression until the next newline. Ideally, we would keep that newline in the list of token but it's difficult to figure out which newline to keep between two right arrows since a clause guard can be written over multiple lines. Though, since we know that this is only truly a problem for negative integers, we can use the same trick as for tuples and define a new 'NewLineMinus' token. That token CANNOT be part of a binop expression. That means it's impossible to write a binary operation with a minus over multiple lines, or more specifically, with the '-' symbol on a newline. This sounds like a fair limitation. What we get in exchange is less ambiguity when parsing patterns following expressions in when clause cases. Another more cumbersome option could be to preserve the first newline encountered after a 'right-arrow' symbol and before any parenthesis or curly brace is found (which would otherwise signal the beginning of a new block). That requires to traverse, at least partially, the list of tokens twice. This feels unnecessary for now and until we do face a similar issue with a binary operator.
This commit is contained in:
parent
346df47232
commit
78d34f7f76
|
@ -2,7 +2,7 @@ use chumsky::prelude::*;
|
|||
|
||||
use crate::{
|
||||
ast,
|
||||
parser::{annotation, error::ParseError, literal::bytearray, token::Token, utils},
|
||||
parser::{annotation, error::ParseError, literal, token::Token, utils},
|
||||
};
|
||||
|
||||
pub fn parser() -> impl Parser<Token, ast::UntypedDefinition, Error = ParseError> {
|
||||
|
@ -39,20 +39,20 @@ pub fn value() -> impl Parser<Token, ast::Constant, Error = ParseError> {
|
|||
});
|
||||
|
||||
let constant_int_parser =
|
||||
select! {Token::Int {value, base} => (value, base)}.map_with_span(|(value, base), span| {
|
||||
ast::Constant::Int {
|
||||
location: span,
|
||||
value,
|
||||
base,
|
||||
}
|
||||
literal::int().map_with_span(|(value, base), location| ast::Constant::Int {
|
||||
location,
|
||||
value,
|
||||
base,
|
||||
});
|
||||
|
||||
let constant_bytearray_parser =
|
||||
bytearray(|bytes, preferred_format, span| ast::Constant::ByteArray {
|
||||
location: span,
|
||||
bytes,
|
||||
preferred_format,
|
||||
});
|
||||
literal::bytearray(
|
||||
|bytes, preferred_format, location| ast::Constant::ByteArray {
|
||||
location,
|
||||
bytes,
|
||||
preferred_format,
|
||||
},
|
||||
);
|
||||
|
||||
choice((
|
||||
constant_string_parser,
|
||||
|
|
|
@ -6,14 +6,14 @@ use crate::{
|
|||
};
|
||||
|
||||
pub fn parser(
|
||||
r: Recursive<'_, Token, UntypedExpr, ParseError>,
|
||||
expression: Recursive<'_, Token, UntypedExpr, ParseError>,
|
||||
) -> impl Parser<Token, UntypedExpr, Error = ParseError> + '_ {
|
||||
just(Token::LeftSquare)
|
||||
.ignore_then(r.clone().separated_by(just(Token::Comma)))
|
||||
.ignore_then(expression.clone().separated_by(just(Token::Comma)))
|
||||
.then(choice((
|
||||
just(Token::Comma).ignore_then(
|
||||
just(Token::DotDot)
|
||||
.ignore_then(r.clone())
|
||||
.ignore_then(expression)
|
||||
.map(Box::new)
|
||||
.or_not(),
|
||||
),
|
||||
|
|
|
@ -56,7 +56,7 @@ pub fn pure_expression<'a>(
|
|||
// Negate
|
||||
let op = choice((
|
||||
just(Token::Bang).to(ast::UnOp::Not),
|
||||
just(Token::Minus)
|
||||
choice((just(Token::Minus), just(Token::NewLineMinus)))
|
||||
// NOTE: Prevent conflict with usage for '-' as a standalone binary op.
|
||||
// This will make '-' parse when used as standalone binop in a function call.
|
||||
// For example:
|
||||
|
|
|
@ -12,15 +12,15 @@ use crate::{
|
|||
};
|
||||
|
||||
pub fn parser(
|
||||
r: Recursive<'_, Token, UntypedExpr, ParseError>,
|
||||
expression: Recursive<'_, Token, UntypedExpr, ParseError>,
|
||||
) -> impl Parser<Token, UntypedExpr, Error = ParseError> + '_ {
|
||||
just(Token::When)
|
||||
// TODO: If subject is empty we should return ParseErrorType::ExpectedExpr,
|
||||
.ignore_then(r.clone().map(Box::new))
|
||||
.ignore_then(expression.clone().map(Box::new))
|
||||
.then_ignore(just(Token::Is))
|
||||
.then_ignore(just(Token::LeftBrace))
|
||||
// TODO: If clauses are empty we should return ParseErrorType::NoCaseClause
|
||||
.then(clause(r).repeated())
|
||||
.then(clause(expression).repeated())
|
||||
.then_ignore(just(Token::RightBrace))
|
||||
.map_with_span(|(subject, clauses), span| UntypedExpr::When {
|
||||
location: span,
|
||||
|
|
|
@ -59,6 +59,13 @@ pub fn run(src: &str) -> Result<LexInfo, Vec<ParseError>> {
|
|||
Some((Token::LeftParen, *span))
|
||||
}
|
||||
}
|
||||
Token::Minus => {
|
||||
if previous_is_newline {
|
||||
Some((Token::NewLineMinus, *span))
|
||||
} else {
|
||||
Some((Token::Minus, *span))
|
||||
}
|
||||
}
|
||||
Token::Pipe => {
|
||||
if previous_is_newline {
|
||||
Some((Token::NewLinePipe, *span))
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
use chumsky::prelude::*;
|
||||
|
||||
use crate::parser::{
|
||||
error::ParseError,
|
||||
token::{Base, Token},
|
||||
};
|
||||
|
||||
pub fn parser() -> impl Parser<Token, (String, Base), Error = ParseError> {
|
||||
choice((just(Token::NewLineMinus), just(Token::Minus)))
|
||||
.ignored()
|
||||
.or_not()
|
||||
.map(|v| v.is_some())
|
||||
.then(select! { Token::Int {value, base} => (value, base)})
|
||||
.map(|(is_negative, (value, base))| {
|
||||
if is_negative {
|
||||
(format!("-{value}"), base)
|
||||
} else {
|
||||
(value, base)
|
||||
}
|
||||
})
|
||||
}
|
|
@ -1,7 +1,9 @@
|
|||
mod bytearray;
|
||||
mod int;
|
||||
mod string;
|
||||
mod uint;
|
||||
|
||||
pub use bytearray::{array_of_bytes, hex_string, parser as bytearray, utf8_string};
|
||||
pub use int::parser as int;
|
||||
pub use string::parser as string;
|
||||
pub use uint::parser as uint;
|
||||
|
|
|
@ -2,15 +2,41 @@ use chumsky::prelude::*;
|
|||
|
||||
use crate::{
|
||||
ast::UntypedPattern,
|
||||
parser::{error::ParseError, token::Token},
|
||||
parser::{error::ParseError, literal, token::Token},
|
||||
};
|
||||
|
||||
pub fn parser() -> impl Parser<Token, UntypedPattern, Error = ParseError> {
|
||||
select! {Token::Int {value, base} => (value, base)}.map_with_span(|(value, base), location| {
|
||||
UntypedPattern::Int {
|
||||
location,
|
||||
value,
|
||||
base,
|
||||
}
|
||||
literal::int().map_with_span(|(value, base), location| UntypedPattern::Int {
|
||||
location,
|
||||
value,
|
||||
base,
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::assert_expr;
|
||||
|
||||
#[test]
|
||||
fn pattern_negative_int() {
|
||||
assert_expr!(
|
||||
r#"
|
||||
when foo is {
|
||||
-1 -> True
|
||||
}
|
||||
"#
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pattern_negative_int_not_first_case() {
|
||||
assert_expr!(
|
||||
r#"
|
||||
when bar is {
|
||||
42 -> -14
|
||||
-42 -> 14
|
||||
}
|
||||
"#
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,30 @@
|
|||
---
|
||||
source: crates/aiken-lang/src/parser/pattern/int.rs
|
||||
description: "Code:\n\nwhen foo is {\n -1 -> True\n}\n"
|
||||
---
|
||||
When {
|
||||
location: 0..28,
|
||||
subject: Var {
|
||||
location: 5..8,
|
||||
name: "foo",
|
||||
},
|
||||
clauses: [
|
||||
UntypedClause {
|
||||
location: 16..26,
|
||||
patterns: [
|
||||
Int {
|
||||
location: 16..18,
|
||||
value: "-1",
|
||||
base: Decimal {
|
||||
numeric_underscore: false,
|
||||
},
|
||||
},
|
||||
],
|
||||
guard: None,
|
||||
then: Var {
|
||||
location: 22..26,
|
||||
name: "True",
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
|
@ -0,0 +1,57 @@
|
|||
---
|
||||
source: crates/aiken-lang/src/parser/pattern/int.rs
|
||||
description: "Code:\n\nwhen bar is {\n 42 -> -14\n -42 -> 14\n}\n"
|
||||
---
|
||||
When {
|
||||
location: 0..39,
|
||||
subject: Var {
|
||||
location: 5..8,
|
||||
name: "bar",
|
||||
},
|
||||
clauses: [
|
||||
UntypedClause {
|
||||
location: 16..25,
|
||||
patterns: [
|
||||
Int {
|
||||
location: 16..18,
|
||||
value: "42",
|
||||
base: Decimal {
|
||||
numeric_underscore: false,
|
||||
},
|
||||
},
|
||||
],
|
||||
guard: None,
|
||||
then: UnOp {
|
||||
op: Negate,
|
||||
location: 22..25,
|
||||
value: UInt {
|
||||
location: 23..25,
|
||||
value: "14",
|
||||
base: Decimal {
|
||||
numeric_underscore: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
UntypedClause {
|
||||
location: 28..37,
|
||||
patterns: [
|
||||
Int {
|
||||
location: 28..31,
|
||||
value: "-42",
|
||||
base: Decimal {
|
||||
numeric_underscore: false,
|
||||
},
|
||||
},
|
||||
],
|
||||
guard: None,
|
||||
then: UInt {
|
||||
location: 35..37,
|
||||
value: "14",
|
||||
base: Decimal {
|
||||
numeric_underscore: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
|
@ -27,6 +27,7 @@ pub enum Token {
|
|||
// Int Operators
|
||||
Plus,
|
||||
Minus,
|
||||
NewLineMinus,
|
||||
Star,
|
||||
Slash,
|
||||
Less,
|
||||
|
@ -115,6 +116,7 @@ impl fmt::Display for Token {
|
|||
Token::RightBrace => "}",
|
||||
Token::Plus => "+",
|
||||
Token::Minus => "-",
|
||||
Token::NewLineMinus => "↳-",
|
||||
Token::Star => "*",
|
||||
Token::Slash => "/",
|
||||
Token::Less => "<",
|
||||
|
|
Loading…
Reference in New Issue