Extend parser to support int as hexadecimal and numeric underscore.
We only allow numeric underscore for decimal numbers as I am not sure how we can define it for non-decimal numbers?
This commit is contained in:
@@ -55,6 +55,26 @@ impl ParseError {
|
||||
label: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn malformed_base16_digits(span: Span) -> Self {
|
||||
Self {
|
||||
kind: ErrorKind::MalformedBase16Digits,
|
||||
span,
|
||||
while_parsing: None,
|
||||
expected: HashSet::new(),
|
||||
label: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn hybrid_notation_in_bytearray(span: Span) -> Self {
|
||||
Self {
|
||||
kind: ErrorKind::HybridNotationInByteArray,
|
||||
span,
|
||||
while_parsing: None,
|
||||
expected: HashSet::new(),
|
||||
label: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for ParseError {
|
||||
@@ -114,6 +134,12 @@ pub enum ErrorKind {
|
||||
hint: Option<String>,
|
||||
},
|
||||
|
||||
#[error("I tripped over a malformed hexadecimal digits.")]
|
||||
#[diagnostic(help("{}", formatdoc! {
|
||||
r#"When numbers starts with '0x', they are treated as hexadecimal numbers. Thus, only digits from 0-9 or letter from a-f (or A-F) can be used following a '0x' number declaration. Plus, hexadecimal digits always go by pairs, so the total number of digits must be even (not counting leading zeros)."#
|
||||
}))]
|
||||
MalformedBase16Digits,
|
||||
|
||||
#[error("I tripped over a malformed base16-encoded string literal.")]
|
||||
#[diagnostic(help("{}", formatdoc! {
|
||||
r#"You can declare literal bytearrays from base16-encoded (a.k.a. hexadecimal) string literals.
|
||||
@@ -131,6 +157,11 @@ pub enum ErrorKind {
|
||||
}))]
|
||||
MalformedBase16StringLiteral,
|
||||
|
||||
#[error("I came across a bytearray declared using two different notations")]
|
||||
#[diagnostic(url("https://aiken-lang.org/language-tour/primitive-types#bytearray"))]
|
||||
#[diagnostic(help("Either use decimal or hexadecimal notation, but don't mix them."))]
|
||||
HybridNotationInByteArray,
|
||||
|
||||
#[error("I failed to understand a when clause guard.")]
|
||||
#[diagnostic(url("https://aiken-lang.org/language-tour/control-flow#checking-equality-and-ordering-in-patterns"))]
|
||||
#[diagnostic(help("{}", formatdoc! {
|
||||
|
||||
@@ -1,13 +1,58 @@
|
||||
use chumsky::prelude::*;
|
||||
|
||||
use super::{
|
||||
error::ParseError,
|
||||
token::{Base, Token},
|
||||
};
|
||||
use crate::ast::Span;
|
||||
|
||||
use chumsky::prelude::*;
|
||||
use num_bigint::BigInt;
|
||||
use ordinal::Ordinal;
|
||||
|
||||
use super::{error::ParseError, token::Token};
|
||||
|
||||
pub fn lexer() -> impl Parser<char, Vec<(Token, Span)>, Error = ParseError> {
|
||||
let int = text::int(10).map(|value| Token::Int { value });
|
||||
let base10 = text::int(10).map(|value| Token::Int {
|
||||
value,
|
||||
base: Base::Decimal {
|
||||
numeric_underscore: false,
|
||||
},
|
||||
});
|
||||
|
||||
let base10_underscore = one_of("0123456789")
|
||||
.repeated()
|
||||
.at_least(1)
|
||||
.at_most(3)
|
||||
.separated_by(just("_"))
|
||||
.at_least(1)
|
||||
.flatten()
|
||||
.collect::<String>()
|
||||
.map(|value| Token::Int {
|
||||
value,
|
||||
base: Base::Decimal {
|
||||
numeric_underscore: true,
|
||||
},
|
||||
});
|
||||
|
||||
let base16 = just("0x")
|
||||
.ignore_then(
|
||||
one_of("0123456789abcdefABCDEF")
|
||||
.repeated()
|
||||
.at_least(2)
|
||||
.collect::<String>(),
|
||||
)
|
||||
.validate(|value: String, span, emit| {
|
||||
let value = match BigInt::parse_bytes(value.as_bytes(), 16) {
|
||||
None => {
|
||||
emit(ParseError::malformed_base16_digits(span));
|
||||
String::new()
|
||||
}
|
||||
Some(n) => n.to_str_radix(10),
|
||||
};
|
||||
|
||||
Token::Int {
|
||||
value,
|
||||
base: Base::Hexadecimal,
|
||||
}
|
||||
});
|
||||
|
||||
let int = choice((base16, base10_underscore, base10));
|
||||
|
||||
let ordinal = text::int(10)
|
||||
.then_with(|index: String| {
|
||||
|
||||
@@ -1,5 +1,11 @@
|
||||
use std::fmt;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Hash, Eq, Copy)]
|
||||
pub enum Base {
|
||||
Decimal { numeric_underscore: bool },
|
||||
Hexadecimal,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Hash, Eq)]
|
||||
pub enum Token {
|
||||
Error(char),
|
||||
@@ -7,7 +13,7 @@ pub enum Token {
|
||||
Ordinal { index: u32 },
|
||||
UpName { name: String },
|
||||
DiscardName { name: String },
|
||||
Int { value: String },
|
||||
Int { value: String, base: Base },
|
||||
ByteString { value: String },
|
||||
String { value: String },
|
||||
// Groupings
|
||||
@@ -97,7 +103,7 @@ impl fmt::Display for Token {
|
||||
}
|
||||
Token::UpName { name } => name,
|
||||
Token::DiscardName { name } => name,
|
||||
Token::Int { value } => value,
|
||||
Token::Int { value, .. } => value,
|
||||
Token::String { value } => value,
|
||||
Token::ByteString { value } => value,
|
||||
Token::NewLineLeftParen => "↳(",
|
||||
|
||||
Reference in New Issue
Block a user