diff --git a/crates/aiken-lang/src/ast.rs b/crates/aiken-lang/src/ast.rs index 9f98f97c..a2b1ad89 100644 --- a/crates/aiken-lang/src/ast.rs +++ b/crates/aiken-lang/src/ast.rs @@ -1167,6 +1167,12 @@ impl Span { Self::new((), 0..0) } + pub fn create(i: usize, n: usize) -> Self { + use chumsky::Span; + + Self::new((), i..i + n) + } + pub fn range(&self) -> Range { use chumsky::Span; diff --git a/crates/aiken-lang/src/parser.rs b/crates/aiken-lang/src/parser.rs index 2a7b628d..f384de20 100644 --- a/crates/aiken-lang/src/parser.rs +++ b/crates/aiken-lang/src/parser.rs @@ -13,78 +13,20 @@ pub use definitions::parser as definitions; pub use expr::parser as expression; pub use pattern::parser as pattern; -use crate::ast::{self, Span}; -use chumsky::{chain::Chain, prelude::*}; +use crate::ast; +use chumsky::prelude::*; use error::ParseError; use extra::ModuleExtra; -use token::Token; pub fn module( src: &str, kind: ast::ModuleKind, ) -> Result<(ast::UntypedModule, ModuleExtra), Vec> { - let len = src.as_bytes().len(); + let lexer::LexInfo { tokens, extra } = lexer::run(src)?; - let span = |i, n| Span::new((), i..i + n); + let stream = chumsky::Stream::from_iter(ast::Span::create(tokens.len()), tokens.into_iter()); - let tokens = lexer::lexer().parse(chumsky::Stream::from_iter( - span(len, 1), - src.chars().scan(0, |i, c| { - let start = *i; - let offset = c.len_utf8(); - *i = start + offset; - Some((c, span(start, offset))) - }), - ))?; - - let mut extra = ModuleExtra::new(); - - let mut previous_is_newline = false; - - let tokens = tokens.into_iter().filter_map(|(token, ref span)| { - let current_is_newline = token == Token::NewLine || token == Token::EmptyLine; - let result = match token { - Token::ModuleComment => { - extra.module_comments.push(*span); - None - } - Token::DocComment => { - extra.doc_comments.push(*span); - None - } - Token::Comment => { - extra.comments.push(*span); - None - } - Token::EmptyLine => { - extra.empty_lines.push(span.start); - None - } - Token::LeftParen => { - if previous_is_newline { - Some((Token::NewLineLeftParen, *span)) - } else { - Some((Token::LeftParen, *span)) - } - } - Token::Pipe => { - if previous_is_newline { - Some((Token::NewLinePipe, *span)) - } else { - Some((Token::Pipe, *span)) - } - } - Token::NewLine => None, - _ => Some((token, *span)), - }; - - previous_is_newline = current_is_newline; - - result - }); - - let definitions = - definitions().parse(chumsky::Stream::from_iter(span(tokens.len()), tokens))?; + let definitions = definitions().parse(stream)?; let module = ast::UntypedModule { kind, diff --git a/crates/aiken-lang/src/parser/lexer.rs b/crates/aiken-lang/src/parser/lexer.rs index 110d0335..f81e8207 100644 --- a/crates/aiken-lang/src/parser/lexer.rs +++ b/crates/aiken-lang/src/parser/lexer.rs @@ -1,12 +1,84 @@ -use super::{ - error::ParseError, - token::{Base, Token}, -}; -use crate::ast::Span; use chumsky::prelude::*; use num_bigint::BigInt; use ordinal::Ordinal; +use super::{ + error::ParseError, + extra::ModuleExtra, + token::{Base, Token}, +}; +use crate::ast::Span; + +pub struct LexInfo { + pub tokens: Vec<(Token, Span)>, + pub extra: ModuleExtra, +} + +pub fn run(src: &str) -> Result> { + let len = src.as_bytes().len(); + + let tokens = lexer().parse(chumsky::Stream::from_iter( + Span::create(len, 1), + src.chars().scan(0, |i, c| { + let start = *i; + let offset = c.len_utf8(); + *i = start + offset; + Some((c, Span::create(start, offset))) + }), + ))?; + + let mut extra = ModuleExtra::new(); + + let mut previous_is_newline = false; + + let tokens = tokens + .into_iter() + .filter_map(|(token, ref span)| { + let current_is_newline = token == Token::NewLine || token == Token::EmptyLine; + let result = match token { + Token::ModuleComment => { + extra.module_comments.push(*span); + None + } + Token::DocComment => { + extra.doc_comments.push(*span); + None + } + Token::Comment => { + extra.comments.push(*span); + None + } + Token::EmptyLine => { + extra.empty_lines.push(span.start); + None + } + Token::LeftParen => { + if previous_is_newline { + Some((Token::NewLineLeftParen, *span)) + } else { + Some((Token::LeftParen, *span)) + } + } + Token::Pipe => { + if previous_is_newline { + Some((Token::NewLinePipe, *span)) + } else { + Some((Token::Pipe, *span)) + } + } + Token::NewLine => None, + _ => Some((token, *span)), + }; + + previous_is_newline = current_is_newline; + + result + }) + .collect::>(); + + Ok(LexInfo { tokens, extra }) +} + pub fn lexer() -> impl Parser, Error = ParseError> { let base10 = text::int(10).map(|value| Token::Int { value, diff --git a/crates/aiken-lang/src/tests/parser.rs b/crates/aiken-lang/src/tests/parser.rs index 5a4f174d..1eb01af7 100644 --- a/crates/aiken-lang/src/tests/parser.rs +++ b/crates/aiken-lang/src/tests/parser.rs @@ -23,7 +23,7 @@ fn assert_definitions(code: &str, definitions: Vec) { ) } -macro_rules! snapshot_test { +macro_rules! assert_parse { ($name:ident, $code:expr) => { #[test] fn $name() { @@ -41,23 +41,7 @@ macro_rules! snapshot_test { }; } -snapshot_test!(snapshot_windows_newline, "use aiken/list\r\n"); - -#[test] -fn windows_newline() { - let code = "use aiken/list\r\n"; - - assert_definitions( - code, - vec![ast::UntypedDefinition::Use(Use { - location: Span::new((), 0..14), - module: vec!["aiken".to_string(), "list".to_string()], - as_name: None, - unqualified: vec![], - package: (), - })], - ) -} +assert_parse!(windows_newline, "use aiken/list\r\n"); #[test] fn can_handle_comments_at_end_of_file() { diff --git a/crates/aiken-lang/src/tests/snapshots/aiken_lang__tests__parser__snapshot_windows_newline.snap b/crates/aiken-lang/src/tests/snapshots/aiken_lang__tests__parser__windows_newline.snap similarity index 93% rename from crates/aiken-lang/src/tests/snapshots/aiken_lang__tests__parser__snapshot_windows_newline.snap rename to crates/aiken-lang/src/tests/snapshots/aiken_lang__tests__parser__windows_newline.snap index b6e2b5b1..f8b6b321 100644 --- a/crates/aiken-lang/src/tests/snapshots/aiken_lang__tests__parser__snapshot_windows_newline.snap +++ b/crates/aiken-lang/src/tests/snapshots/aiken_lang__tests__parser__windows_newline.snap @@ -1,7 +1,7 @@ --- source: crates/aiken-lang/src/tests/parser.rs description: "use aiken/list\r\n" -info: snapshot_windows_newline +info: windows_newline --- Module { name: "",