From 66296df9c3886bab77d0a8d2281b2dbef19d3d57 Mon Sep 17 00:00:00 2001 From: KtorZ Date: Wed, 5 Jul 2023 14:36:23 +0200 Subject: [PATCH] Move parsing of literals under new 'literal' parser module group Also moved the logic for 'int' and 'string' there though it is trivial. Yet, for bytearray, it tidies things nicely by removing them from the 'utils' module. --- crates/aiken-lang/src/parser.rs | 1 + .../src/parser/definition/constant.rs | 6 +- .../aiken-lang/src/parser/expr/bytearray.rs | 89 +------------------ crates/aiken-lang/src/parser/expr/int.rs | 12 ++- crates/aiken-lang/src/parser/expr/string.rs | 4 +- .../src/parser/literal/bytearray.rs | 87 ++++++++++++++++++ crates/aiken-lang/src/parser/literal/int.rs | 10 +++ crates/aiken-lang/src/parser/literal/mod.rs | 3 + .../aiken-lang/src/parser/literal/string.rs | 7 ++ 9 files changed, 123 insertions(+), 96 deletions(-) create mode 100644 crates/aiken-lang/src/parser/literal/bytearray.rs create mode 100644 crates/aiken-lang/src/parser/literal/int.rs create mode 100644 crates/aiken-lang/src/parser/literal/mod.rs create mode 100644 crates/aiken-lang/src/parser/literal/string.rs diff --git a/crates/aiken-lang/src/parser.rs b/crates/aiken-lang/src/parser.rs index 78c1e87c..d4af7017 100644 --- a/crates/aiken-lang/src/parser.rs +++ b/crates/aiken-lang/src/parser.rs @@ -4,6 +4,7 @@ pub mod error; pub mod expr; pub mod extra; pub mod lexer; +pub mod literal; pub mod pattern; pub mod token; mod utils; diff --git a/crates/aiken-lang/src/parser/definition/constant.rs b/crates/aiken-lang/src/parser/definition/constant.rs index 8963707b..1e2629ed 100644 --- a/crates/aiken-lang/src/parser/definition/constant.rs +++ b/crates/aiken-lang/src/parser/definition/constant.rs @@ -2,7 +2,9 @@ use chumsky::prelude::*; use crate::{ ast, - parser::{annotation, error::ParseError, expr::bytearray::bytearray, token::Token, utils}, + parser::{ + annotation, error::ParseError, literal::bytearray::parser as bytearray, token::Token, utils, + }, }; pub fn parser() -> impl Parser { @@ -48,7 +50,7 @@ pub fn value() -> impl Parser { }); let constant_bytearray_parser = - bytearray().map_with_span(|(preferred_format, bytes), span| ast::Constant::ByteArray { + bytearray(|bytes, preferred_format, span| ast::Constant::ByteArray { location: span, bytes, preferred_format, diff --git a/crates/aiken-lang/src/parser/expr/bytearray.rs b/crates/aiken-lang/src/parser/expr/bytearray.rs index 0b46c4db..2829b8dc 100644 --- a/crates/aiken-lang/src/parser/expr/bytearray.rs +++ b/crates/aiken-lang/src/parser/expr/bytearray.rs @@ -1,98 +1,17 @@ use chumsky::prelude::*; -use crate::{ - ast, - expr::UntypedExpr, - parser::{ - error::{self, ParseError}, - token::{Base, Token}, - }, +use crate::parser::{ + error::ParseError, expr::UntypedExpr, literal::bytearray::parser as bytearray, token::Token, }; pub fn parser() -> impl Parser { - bytearray().map_with_span(|(preferred_format, bytes), span| UntypedExpr::ByteArray { - location: span, + bytearray(|bytes, preferred_format, location| UntypedExpr::ByteArray { + location, bytes, preferred_format, }) } -pub fn bytearray( -) -> impl Parser), Error = ParseError> { - choice((array_of_bytes(), hex_string(), utf8_string())) -} - -pub fn array_of_bytes( -) -> impl Parser), Error = ParseError> { - just(Token::Hash) - .ignore_then( - select! {Token::Int {value, base, ..} => (value, base)} - .validate(|(value, base), span, emit| { - let byte: u8 = match value.parse() { - Ok(b) => b, - Err(_) => { - emit(ParseError::expected_input_found( - span, - None, - Some(error::Pattern::Byte), - )); - 0 - } - }; - (byte, base) - }) - .separated_by(just(Token::Comma)) - .allow_trailing() - .delimited_by(just(Token::LeftSquare), just(Token::RightSquare)), - ) - .validate(|bytes, span, emit| { - let base = bytes.iter().fold(Ok(None), |acc, (_, base)| match acc { - Ok(None) => Ok(Some(base)), - Ok(Some(previous_base)) if previous_base == base => Ok(Some(base)), - _ => Err(()), - }); - - let base = match base { - Err(()) => { - emit(ParseError::hybrid_notation_in_bytearray(span)); - Base::Decimal { - numeric_underscore: false, - } - } - Ok(None) => Base::Decimal { - numeric_underscore: false, - }, - Ok(Some(base)) => *base, - }; - - (bytes.into_iter().map(|(b, _)| b).collect::>(), base) - }) - .map(|(bytes, base)| (ast::ByteArrayFormatPreference::ArrayOfBytes(base), bytes)) -} - -pub fn hex_string( -) -> impl Parser), Error = ParseError> { - just(Token::Hash) - .ignore_then( - select! {Token::ByteString {value} => value}.validate(|value, span, emit| { - match hex::decode(value) { - Ok(bytes) => bytes, - Err(_) => { - emit(ParseError::malformed_base16_string_literal(span)); - vec![] - } - } - }), - ) - .map(|token| (ast::ByteArrayFormatPreference::HexadecimalString, token)) -} - -pub fn utf8_string( -) -> impl Parser), Error = ParseError> { - select! {Token::ByteString {value} => value.into_bytes() } - .map(|token| (ast::ByteArrayFormatPreference::Utf8String, token)) -} - #[cfg(test)] mod tests { use crate::assert_expr; diff --git a/crates/aiken-lang/src/parser/expr/int.rs b/crates/aiken-lang/src/parser/expr/int.rs index 6e4f628c..5ed56f13 100644 --- a/crates/aiken-lang/src/parser/expr/int.rs +++ b/crates/aiken-lang/src/parser/expr/int.rs @@ -2,16 +2,14 @@ use chumsky::prelude::*; use crate::{ expr::UntypedExpr, - parser::{error::ParseError, token::Token}, + parser::{error::ParseError, literal::int::parser as int, token::Token}, }; pub fn parser() -> impl Parser { - select! { Token::Int {value, base} => (value, base)}.map_with_span(|(value, base), span| { - UntypedExpr::Int { - location: span, - value, - base, - } + int().map_with_span(|(value, base), span| UntypedExpr::Int { + location: span, + value, + base, }) } diff --git a/crates/aiken-lang/src/parser/expr/string.rs b/crates/aiken-lang/src/parser/expr/string.rs index d1c07acf..f1361d64 100644 --- a/crates/aiken-lang/src/parser/expr/string.rs +++ b/crates/aiken-lang/src/parser/expr/string.rs @@ -3,11 +3,11 @@ use chumsky::prelude::*; use crate::{ ast, expr::UntypedExpr, - parser::{error::ParseError, token::Token}, + parser::{error::ParseError, literal::string::parser as string, token::Token}, }; pub fn parser() -> impl Parser { - select! {Token::String {value} => value}.map_with_span(|value, span| UntypedExpr::String { + string().map_with_span(|value, span| UntypedExpr::String { location: span, value, }) diff --git a/crates/aiken-lang/src/parser/literal/bytearray.rs b/crates/aiken-lang/src/parser/literal/bytearray.rs new file mode 100644 index 00000000..5516b981 --- /dev/null +++ b/crates/aiken-lang/src/parser/literal/bytearray.rs @@ -0,0 +1,87 @@ +use chumsky::prelude::*; + +use crate::{ + ast, + parser::{ + error::{self, ParseError}, + token::{Base, Token}, + }, +}; + +pub fn parser( + into: impl Fn(Vec, ast::ByteArrayFormatPreference, ast::Span) -> A, +) -> impl Parser { + choice((array_of_bytes(), hex_string(), utf8_string())) + .map_with_span(move |(preferred_format, bytes), span| into(bytes, preferred_format, span)) +} + +pub fn array_of_bytes( +) -> impl Parser), Error = ParseError> { + just(Token::Hash) + .ignore_then( + select! {Token::Int {value, base, ..} => (value, base)} + .validate(|(value, base), span, emit| { + let byte: u8 = match value.parse() { + Ok(b) => b, + Err(_) => { + emit(ParseError::expected_input_found( + span, + None, + Some(error::Pattern::Byte), + )); + 0 + } + }; + (byte, base) + }) + .separated_by(just(Token::Comma)) + .allow_trailing() + .delimited_by(just(Token::LeftSquare), just(Token::RightSquare)), + ) + .validate(|bytes, span, emit| { + let base = bytes.iter().fold(Ok(None), |acc, (_, base)| match acc { + Ok(None) => Ok(Some(base)), + Ok(Some(previous_base)) if previous_base == base => Ok(Some(base)), + _ => Err(()), + }); + + let base = match base { + Err(()) => { + emit(ParseError::hybrid_notation_in_bytearray(span)); + Base::Decimal { + numeric_underscore: false, + } + } + Ok(None) => Base::Decimal { + numeric_underscore: false, + }, + Ok(Some(base)) => *base, + }; + + (bytes.into_iter().map(|(b, _)| b).collect::>(), base) + }) + .map(|(bytes, base)| (ast::ByteArrayFormatPreference::ArrayOfBytes(base), bytes)) +} + +pub fn hex_string( +) -> impl Parser), Error = ParseError> { + just(Token::Hash) + .ignore_then( + select! {Token::ByteString {value} => value}.validate(|value, span, emit| { + match hex::decode(value) { + Ok(bytes) => bytes, + Err(_) => { + emit(ParseError::malformed_base16_string_literal(span)); + vec![] + } + } + }), + ) + .map(|token| (ast::ByteArrayFormatPreference::HexadecimalString, token)) +} + +pub fn utf8_string( +) -> impl Parser), Error = ParseError> { + select! {Token::ByteString {value} => value.into_bytes() } + .map(|token| (ast::ByteArrayFormatPreference::Utf8String, token)) +} diff --git a/crates/aiken-lang/src/parser/literal/int.rs b/crates/aiken-lang/src/parser/literal/int.rs new file mode 100644 index 00000000..05e6cdb1 --- /dev/null +++ b/crates/aiken-lang/src/parser/literal/int.rs @@ -0,0 +1,10 @@ +use chumsky::prelude::*; + +use crate::parser::{ + error::ParseError, + token::{Base, Token}, +}; + +pub fn parser() -> impl Parser { + select! { Token::Int {value, base} => (value, base)} +} diff --git a/crates/aiken-lang/src/parser/literal/mod.rs b/crates/aiken-lang/src/parser/literal/mod.rs new file mode 100644 index 00000000..6a67a32e --- /dev/null +++ b/crates/aiken-lang/src/parser/literal/mod.rs @@ -0,0 +1,3 @@ +pub(crate) mod bytearray; +pub(crate) mod int; +pub(crate) mod string; diff --git a/crates/aiken-lang/src/parser/literal/string.rs b/crates/aiken-lang/src/parser/literal/string.rs new file mode 100644 index 00000000..d1d88cf8 --- /dev/null +++ b/crates/aiken-lang/src/parser/literal/string.rs @@ -0,0 +1,7 @@ +use chumsky::prelude::*; + +use crate::parser::{error::ParseError, token::Token}; + +pub fn parser() -> impl Parser { + select! {Token::String {value} => value} +}