Extend parser to support int as hexadecimal and numeric underscore.

We only allow numeric underscore for decimal numbers as I am not sure how we can define it for non-decimal numbers?
This commit is contained in:
KtorZ
2023-06-08 14:12:33 +02:00
parent 0ea11a4d13
commit 79a2174f0a
12 changed files with 446 additions and 40 deletions

View File

@@ -55,6 +55,26 @@ impl ParseError {
label: None,
}
}
pub fn malformed_base16_digits(span: Span) -> Self {
Self {
kind: ErrorKind::MalformedBase16Digits,
span,
while_parsing: None,
expected: HashSet::new(),
label: None,
}
}
pub fn hybrid_notation_in_bytearray(span: Span) -> Self {
Self {
kind: ErrorKind::HybridNotationInByteArray,
span,
while_parsing: None,
expected: HashSet::new(),
label: None,
}
}
}
impl PartialEq for ParseError {
@@ -114,6 +134,12 @@ pub enum ErrorKind {
hint: Option<String>,
},
#[error("I tripped over a malformed hexadecimal digits.")]
#[diagnostic(help("{}", formatdoc! {
r#"When numbers starts with '0x', they are treated as hexadecimal numbers. Thus, only digits from 0-9 or letter from a-f (or A-F) can be used following a '0x' number declaration. Plus, hexadecimal digits always go by pairs, so the total number of digits must be even (not counting leading zeros)."#
}))]
MalformedBase16Digits,
#[error("I tripped over a malformed base16-encoded string literal.")]
#[diagnostic(help("{}", formatdoc! {
r#"You can declare literal bytearrays from base16-encoded (a.k.a. hexadecimal) string literals.
@@ -131,6 +157,11 @@ pub enum ErrorKind {
}))]
MalformedBase16StringLiteral,
#[error("I came across a bytearray declared using two different notations")]
#[diagnostic(url("https://aiken-lang.org/language-tour/primitive-types#bytearray"))]
#[diagnostic(help("Either use decimal or hexadecimal notation, but don't mix them."))]
HybridNotationInByteArray,
#[error("I failed to understand a when clause guard.")]
#[diagnostic(url("https://aiken-lang.org/language-tour/control-flow#checking-equality-and-ordering-in-patterns"))]
#[diagnostic(help("{}", formatdoc! {

View File

@@ -1,13 +1,58 @@
use chumsky::prelude::*;
use super::{
error::ParseError,
token::{Base, Token},
};
use crate::ast::Span;
use chumsky::prelude::*;
use num_bigint::BigInt;
use ordinal::Ordinal;
use super::{error::ParseError, token::Token};
pub fn lexer() -> impl Parser<char, Vec<(Token, Span)>, Error = ParseError> {
let int = text::int(10).map(|value| Token::Int { value });
let base10 = text::int(10).map(|value| Token::Int {
value,
base: Base::Decimal {
numeric_underscore: false,
},
});
let base10_underscore = one_of("0123456789")
.repeated()
.at_least(1)
.at_most(3)
.separated_by(just("_"))
.at_least(1)
.flatten()
.collect::<String>()
.map(|value| Token::Int {
value,
base: Base::Decimal {
numeric_underscore: true,
},
});
let base16 = just("0x")
.ignore_then(
one_of("0123456789abcdefABCDEF")
.repeated()
.at_least(2)
.collect::<String>(),
)
.validate(|value: String, span, emit| {
let value = match BigInt::parse_bytes(value.as_bytes(), 16) {
None => {
emit(ParseError::malformed_base16_digits(span));
String::new()
}
Some(n) => n.to_str_radix(10),
};
Token::Int {
value,
base: Base::Hexadecimal,
}
});
let int = choice((base16, base10_underscore, base10));
let ordinal = text::int(10)
.then_with(|index: String| {

View File

@@ -1,5 +1,11 @@
use std::fmt;
#[derive(Clone, Debug, PartialEq, Hash, Eq, Copy)]
pub enum Base {
Decimal { numeric_underscore: bool },
Hexadecimal,
}
#[derive(Clone, Debug, PartialEq, Hash, Eq)]
pub enum Token {
Error(char),
@@ -7,7 +13,7 @@ pub enum Token {
Ordinal { index: u32 },
UpName { name: String },
DiscardName { name: String },
Int { value: String },
Int { value: String, base: Base },
ByteString { value: String },
String { value: String },
// Groupings
@@ -97,7 +103,7 @@ impl fmt::Display for Token {
}
Token::UpName { name } => name,
Token::DiscardName { name } => name,
Token::Int { value } => value,
Token::Int { value, .. } => value,
Token::String { value } => value,
Token::ByteString { value } => value,
Token::NewLineLeftParen => "↳(",