diff --git a/crates/aiken-lang/src/parser.rs b/crates/aiken-lang/src/parser.rs index 78c1e87c..d4af7017 100644 --- a/crates/aiken-lang/src/parser.rs +++ b/crates/aiken-lang/src/parser.rs @@ -4,6 +4,7 @@ pub mod error; pub mod expr; pub mod extra; pub mod lexer; +pub mod literal; pub mod pattern; pub mod token; mod utils; diff --git a/crates/aiken-lang/src/parser/definition/constant.rs b/crates/aiken-lang/src/parser/definition/constant.rs index 8963707b..1e2629ed 100644 --- a/crates/aiken-lang/src/parser/definition/constant.rs +++ b/crates/aiken-lang/src/parser/definition/constant.rs @@ -2,7 +2,9 @@ use chumsky::prelude::*; use crate::{ ast, - parser::{annotation, error::ParseError, expr::bytearray::bytearray, token::Token, utils}, + parser::{ + annotation, error::ParseError, literal::bytearray::parser as bytearray, token::Token, utils, + }, }; pub fn parser() -> impl Parser { @@ -48,7 +50,7 @@ pub fn value() -> impl Parser { }); let constant_bytearray_parser = - bytearray().map_with_span(|(preferred_format, bytes), span| ast::Constant::ByteArray { + bytearray(|bytes, preferred_format, span| ast::Constant::ByteArray { location: span, bytes, preferred_format, diff --git a/crates/aiken-lang/src/parser/expr/bytearray.rs b/crates/aiken-lang/src/parser/expr/bytearray.rs index 0b46c4db..2829b8dc 100644 --- a/crates/aiken-lang/src/parser/expr/bytearray.rs +++ b/crates/aiken-lang/src/parser/expr/bytearray.rs @@ -1,98 +1,17 @@ use chumsky::prelude::*; -use crate::{ - ast, - expr::UntypedExpr, - parser::{ - error::{self, ParseError}, - token::{Base, Token}, - }, +use crate::parser::{ + error::ParseError, expr::UntypedExpr, literal::bytearray::parser as bytearray, token::Token, }; pub fn parser() -> impl Parser { - bytearray().map_with_span(|(preferred_format, bytes), span| UntypedExpr::ByteArray { - location: span, + bytearray(|bytes, preferred_format, location| UntypedExpr::ByteArray { + location, bytes, preferred_format, }) } -pub fn bytearray( -) -> impl Parser), Error = ParseError> { - choice((array_of_bytes(), hex_string(), utf8_string())) -} - -pub fn array_of_bytes( -) -> impl Parser), Error = ParseError> { - just(Token::Hash) - .ignore_then( - select! {Token::Int {value, base, ..} => (value, base)} - .validate(|(value, base), span, emit| { - let byte: u8 = match value.parse() { - Ok(b) => b, - Err(_) => { - emit(ParseError::expected_input_found( - span, - None, - Some(error::Pattern::Byte), - )); - 0 - } - }; - (byte, base) - }) - .separated_by(just(Token::Comma)) - .allow_trailing() - .delimited_by(just(Token::LeftSquare), just(Token::RightSquare)), - ) - .validate(|bytes, span, emit| { - let base = bytes.iter().fold(Ok(None), |acc, (_, base)| match acc { - Ok(None) => Ok(Some(base)), - Ok(Some(previous_base)) if previous_base == base => Ok(Some(base)), - _ => Err(()), - }); - - let base = match base { - Err(()) => { - emit(ParseError::hybrid_notation_in_bytearray(span)); - Base::Decimal { - numeric_underscore: false, - } - } - Ok(None) => Base::Decimal { - numeric_underscore: false, - }, - Ok(Some(base)) => *base, - }; - - (bytes.into_iter().map(|(b, _)| b).collect::>(), base) - }) - .map(|(bytes, base)| (ast::ByteArrayFormatPreference::ArrayOfBytes(base), bytes)) -} - -pub fn hex_string( -) -> impl Parser), Error = ParseError> { - just(Token::Hash) - .ignore_then( - select! {Token::ByteString {value} => value}.validate(|value, span, emit| { - match hex::decode(value) { - Ok(bytes) => bytes, - Err(_) => { - emit(ParseError::malformed_base16_string_literal(span)); - vec![] - } - } - }), - ) - .map(|token| (ast::ByteArrayFormatPreference::HexadecimalString, token)) -} - -pub fn utf8_string( -) -> impl Parser), Error = ParseError> { - select! {Token::ByteString {value} => value.into_bytes() } - .map(|token| (ast::ByteArrayFormatPreference::Utf8String, token)) -} - #[cfg(test)] mod tests { use crate::assert_expr; diff --git a/crates/aiken-lang/src/parser/expr/int.rs b/crates/aiken-lang/src/parser/expr/int.rs index 6e4f628c..5ed56f13 100644 --- a/crates/aiken-lang/src/parser/expr/int.rs +++ b/crates/aiken-lang/src/parser/expr/int.rs @@ -2,16 +2,14 @@ use chumsky::prelude::*; use crate::{ expr::UntypedExpr, - parser::{error::ParseError, token::Token}, + parser::{error::ParseError, literal::int::parser as int, token::Token}, }; pub fn parser() -> impl Parser { - select! { Token::Int {value, base} => (value, base)}.map_with_span(|(value, base), span| { - UntypedExpr::Int { - location: span, - value, - base, - } + int().map_with_span(|(value, base), span| UntypedExpr::Int { + location: span, + value, + base, }) } diff --git a/crates/aiken-lang/src/parser/expr/string.rs b/crates/aiken-lang/src/parser/expr/string.rs index d1c07acf..f1361d64 100644 --- a/crates/aiken-lang/src/parser/expr/string.rs +++ b/crates/aiken-lang/src/parser/expr/string.rs @@ -3,11 +3,11 @@ use chumsky::prelude::*; use crate::{ ast, expr::UntypedExpr, - parser::{error::ParseError, token::Token}, + parser::{error::ParseError, literal::string::parser as string, token::Token}, }; pub fn parser() -> impl Parser { - select! {Token::String {value} => value}.map_with_span(|value, span| UntypedExpr::String { + string().map_with_span(|value, span| UntypedExpr::String { location: span, value, }) diff --git a/crates/aiken-lang/src/parser/literal/bytearray.rs b/crates/aiken-lang/src/parser/literal/bytearray.rs new file mode 100644 index 00000000..5516b981 --- /dev/null +++ b/crates/aiken-lang/src/parser/literal/bytearray.rs @@ -0,0 +1,87 @@ +use chumsky::prelude::*; + +use crate::{ + ast, + parser::{ + error::{self, ParseError}, + token::{Base, Token}, + }, +}; + +pub fn parser( + into: impl Fn(Vec, ast::ByteArrayFormatPreference, ast::Span) -> A, +) -> impl Parser { + choice((array_of_bytes(), hex_string(), utf8_string())) + .map_with_span(move |(preferred_format, bytes), span| into(bytes, preferred_format, span)) +} + +pub fn array_of_bytes( +) -> impl Parser), Error = ParseError> { + just(Token::Hash) + .ignore_then( + select! {Token::Int {value, base, ..} => (value, base)} + .validate(|(value, base), span, emit| { + let byte: u8 = match value.parse() { + Ok(b) => b, + Err(_) => { + emit(ParseError::expected_input_found( + span, + None, + Some(error::Pattern::Byte), + )); + 0 + } + }; + (byte, base) + }) + .separated_by(just(Token::Comma)) + .allow_trailing() + .delimited_by(just(Token::LeftSquare), just(Token::RightSquare)), + ) + .validate(|bytes, span, emit| { + let base = bytes.iter().fold(Ok(None), |acc, (_, base)| match acc { + Ok(None) => Ok(Some(base)), + Ok(Some(previous_base)) if previous_base == base => Ok(Some(base)), + _ => Err(()), + }); + + let base = match base { + Err(()) => { + emit(ParseError::hybrid_notation_in_bytearray(span)); + Base::Decimal { + numeric_underscore: false, + } + } + Ok(None) => Base::Decimal { + numeric_underscore: false, + }, + Ok(Some(base)) => *base, + }; + + (bytes.into_iter().map(|(b, _)| b).collect::>(), base) + }) + .map(|(bytes, base)| (ast::ByteArrayFormatPreference::ArrayOfBytes(base), bytes)) +} + +pub fn hex_string( +) -> impl Parser), Error = ParseError> { + just(Token::Hash) + .ignore_then( + select! {Token::ByteString {value} => value}.validate(|value, span, emit| { + match hex::decode(value) { + Ok(bytes) => bytes, + Err(_) => { + emit(ParseError::malformed_base16_string_literal(span)); + vec![] + } + } + }), + ) + .map(|token| (ast::ByteArrayFormatPreference::HexadecimalString, token)) +} + +pub fn utf8_string( +) -> impl Parser), Error = ParseError> { + select! {Token::ByteString {value} => value.into_bytes() } + .map(|token| (ast::ByteArrayFormatPreference::Utf8String, token)) +} diff --git a/crates/aiken-lang/src/parser/literal/int.rs b/crates/aiken-lang/src/parser/literal/int.rs new file mode 100644 index 00000000..05e6cdb1 --- /dev/null +++ b/crates/aiken-lang/src/parser/literal/int.rs @@ -0,0 +1,10 @@ +use chumsky::prelude::*; + +use crate::parser::{ + error::ParseError, + token::{Base, Token}, +}; + +pub fn parser() -> impl Parser { + select! { Token::Int {value, base} => (value, base)} +} diff --git a/crates/aiken-lang/src/parser/literal/mod.rs b/crates/aiken-lang/src/parser/literal/mod.rs new file mode 100644 index 00000000..6a67a32e --- /dev/null +++ b/crates/aiken-lang/src/parser/literal/mod.rs @@ -0,0 +1,3 @@ +pub(crate) mod bytearray; +pub(crate) mod int; +pub(crate) mod string; diff --git a/crates/aiken-lang/src/parser/literal/string.rs b/crates/aiken-lang/src/parser/literal/string.rs new file mode 100644 index 00000000..d1d88cf8 --- /dev/null +++ b/crates/aiken-lang/src/parser/literal/string.rs @@ -0,0 +1,7 @@ +use chumsky::prelude::*; + +use crate::parser::{error::ParseError, token::Token}; + +pub fn parser() -> impl Parser { + select! {Token::String {value} => value} +}