feat: move some token processing to the lexer

This commit is contained in:
rvcas 2023-06-30 14:57:41 -04:00
parent 2226747dc1
commit f878ef7cef
No known key found for this signature in database
GPG Key ID: C09B64E263F7D68C
5 changed files with 91 additions and 87 deletions

View File

@ -1167,6 +1167,12 @@ impl Span {
Self::new((), 0..0) Self::new((), 0..0)
} }
pub fn create(i: usize, n: usize) -> Self {
use chumsky::Span;
Self::new((), i..i + n)
}
pub fn range(&self) -> Range<usize> { pub fn range(&self) -> Range<usize> {
use chumsky::Span; use chumsky::Span;

View File

@ -13,78 +13,20 @@ pub use definitions::parser as definitions;
pub use expr::parser as expression; pub use expr::parser as expression;
pub use pattern::parser as pattern; pub use pattern::parser as pattern;
use crate::ast::{self, Span}; use crate::ast;
use chumsky::{chain::Chain, prelude::*}; use chumsky::prelude::*;
use error::ParseError; use error::ParseError;
use extra::ModuleExtra; use extra::ModuleExtra;
use token::Token;
pub fn module( pub fn module(
src: &str, src: &str,
kind: ast::ModuleKind, kind: ast::ModuleKind,
) -> Result<(ast::UntypedModule, ModuleExtra), Vec<ParseError>> { ) -> Result<(ast::UntypedModule, ModuleExtra), Vec<ParseError>> {
let len = src.as_bytes().len(); let lexer::LexInfo { tokens, extra } = lexer::run(src)?;
let span = |i, n| Span::new((), i..i + n); let stream = chumsky::Stream::from_iter(ast::Span::create(tokens.len()), tokens.into_iter());
let tokens = lexer::lexer().parse(chumsky::Stream::from_iter( let definitions = definitions().parse(stream)?;
span(len, 1),
src.chars().scan(0, |i, c| {
let start = *i;
let offset = c.len_utf8();
*i = start + offset;
Some((c, span(start, offset)))
}),
))?;
let mut extra = ModuleExtra::new();
let mut previous_is_newline = false;
let tokens = tokens.into_iter().filter_map(|(token, ref span)| {
let current_is_newline = token == Token::NewLine || token == Token::EmptyLine;
let result = match token {
Token::ModuleComment => {
extra.module_comments.push(*span);
None
}
Token::DocComment => {
extra.doc_comments.push(*span);
None
}
Token::Comment => {
extra.comments.push(*span);
None
}
Token::EmptyLine => {
extra.empty_lines.push(span.start);
None
}
Token::LeftParen => {
if previous_is_newline {
Some((Token::NewLineLeftParen, *span))
} else {
Some((Token::LeftParen, *span))
}
}
Token::Pipe => {
if previous_is_newline {
Some((Token::NewLinePipe, *span))
} else {
Some((Token::Pipe, *span))
}
}
Token::NewLine => None,
_ => Some((token, *span)),
};
previous_is_newline = current_is_newline;
result
});
let definitions =
definitions().parse(chumsky::Stream::from_iter(span(tokens.len()), tokens))?;
let module = ast::UntypedModule { let module = ast::UntypedModule {
kind, kind,

View File

@ -1,12 +1,84 @@
use super::{
error::ParseError,
token::{Base, Token},
};
use crate::ast::Span;
use chumsky::prelude::*; use chumsky::prelude::*;
use num_bigint::BigInt; use num_bigint::BigInt;
use ordinal::Ordinal; use ordinal::Ordinal;
use super::{
error::ParseError,
extra::ModuleExtra,
token::{Base, Token},
};
use crate::ast::Span;
pub struct LexInfo {
pub tokens: Vec<(Token, Span)>,
pub extra: ModuleExtra,
}
pub fn run(src: &str) -> Result<LexInfo, Vec<ParseError>> {
let len = src.as_bytes().len();
let tokens = lexer().parse(chumsky::Stream::from_iter(
Span::create(len, 1),
src.chars().scan(0, |i, c| {
let start = *i;
let offset = c.len_utf8();
*i = start + offset;
Some((c, Span::create(start, offset)))
}),
))?;
let mut extra = ModuleExtra::new();
let mut previous_is_newline = false;
let tokens = tokens
.into_iter()
.filter_map(|(token, ref span)| {
let current_is_newline = token == Token::NewLine || token == Token::EmptyLine;
let result = match token {
Token::ModuleComment => {
extra.module_comments.push(*span);
None
}
Token::DocComment => {
extra.doc_comments.push(*span);
None
}
Token::Comment => {
extra.comments.push(*span);
None
}
Token::EmptyLine => {
extra.empty_lines.push(span.start);
None
}
Token::LeftParen => {
if previous_is_newline {
Some((Token::NewLineLeftParen, *span))
} else {
Some((Token::LeftParen, *span))
}
}
Token::Pipe => {
if previous_is_newline {
Some((Token::NewLinePipe, *span))
} else {
Some((Token::Pipe, *span))
}
}
Token::NewLine => None,
_ => Some((token, *span)),
};
previous_is_newline = current_is_newline;
result
})
.collect::<Vec<(Token, Span)>>();
Ok(LexInfo { tokens, extra })
}
pub fn lexer() -> impl Parser<char, Vec<(Token, Span)>, Error = ParseError> { pub fn lexer() -> impl Parser<char, Vec<(Token, Span)>, Error = ParseError> {
let base10 = text::int(10).map(|value| Token::Int { let base10 = text::int(10).map(|value| Token::Int {
value, value,

View File

@ -23,7 +23,7 @@ fn assert_definitions(code: &str, definitions: Vec<ast::UntypedDefinition>) {
) )
} }
macro_rules! snapshot_test { macro_rules! assert_parse {
($name:ident, $code:expr) => { ($name:ident, $code:expr) => {
#[test] #[test]
fn $name() { fn $name() {
@ -41,23 +41,7 @@ macro_rules! snapshot_test {
}; };
} }
snapshot_test!(snapshot_windows_newline, "use aiken/list\r\n"); assert_parse!(windows_newline, "use aiken/list\r\n");
#[test]
fn windows_newline() {
let code = "use aiken/list\r\n";
assert_definitions(
code,
vec![ast::UntypedDefinition::Use(Use {
location: Span::new((), 0..14),
module: vec!["aiken".to_string(), "list".to_string()],
as_name: None,
unqualified: vec![],
package: (),
})],
)
}
#[test] #[test]
fn can_handle_comments_at_end_of_file() { fn can_handle_comments_at_end_of_file() {

View File

@ -1,7 +1,7 @@
--- ---
source: crates/aiken-lang/src/tests/parser.rs source: crates/aiken-lang/src/tests/parser.rs
description: "use aiken/list\r\n" description: "use aiken/list\r\n"
info: snapshot_windows_newline info: windows_newline
--- ---
Module { Module {
name: "", name: "",