Use byte count for token span in the lexer.
Somehow, miette doesn't play well with spans when using chars indices. So we have to count the number of bytes in strings / chars, so that spans align accordingly.
This commit is contained in:
parent
67c072a1a9
commit
5a6cc855e6
|
@ -20,13 +20,18 @@ pub fn module(
|
|||
src: &str,
|
||||
kind: ast::ModuleKind,
|
||||
) -> Result<(ast::UntypedModule, ModuleExtra), Vec<ParseError>> {
|
||||
let len = src.chars().count();
|
||||
let len = src.as_bytes().len();
|
||||
|
||||
let span = |i| Span::new((), i..i + 1);
|
||||
let span = |i, n| Span::new((), i..i + n);
|
||||
|
||||
let tokens = lexer::lexer().parse(chumsky::Stream::from_iter(
|
||||
span(len),
|
||||
src.chars().enumerate().map(|(i, c)| (c, span(i))),
|
||||
span(len, 1),
|
||||
src.chars().scan(0, |i, c| {
|
||||
let start = *i;
|
||||
let offset = c.len_utf8();
|
||||
*i = start + offset;
|
||||
Some((c, span(start, offset)))
|
||||
}),
|
||||
))?;
|
||||
|
||||
let mut extra = ModuleExtra::new();
|
||||
|
@ -74,7 +79,7 @@ pub fn module(
|
|||
});
|
||||
|
||||
let definitions =
|
||||
module_parser().parse(chumsky::Stream::from_iter(span(tokens.len()), tokens))?;
|
||||
module_parser().parse(chumsky::Stream::from_iter(span(tokens.len(), 1), tokens))?;
|
||||
|
||||
let module = ast::UntypedModule {
|
||||
kind,
|
||||
|
|
|
@ -23,16 +23,12 @@ pub struct Comment<'a> {
|
|||
|
||||
impl<'a> From<(&Span, &'a str)> for Comment<'a> {
|
||||
fn from(src: (&Span, &'a str)) -> Comment<'a> {
|
||||
fn char_indice(s: &str, i: usize) -> usize {
|
||||
s.char_indices().nth(i).unwrap_or((i, ' ')).0
|
||||
}
|
||||
|
||||
let start = char_indice(src.1, src.0.start);
|
||||
let end = char_indice(src.1, src.0.end);
|
||||
|
||||
let start = src.0.start;
|
||||
let end = src.0.end;
|
||||
Comment {
|
||||
start: src.0.start,
|
||||
content: src.1.get(start..end).expect("From span to comment"),
|
||||
start,
|
||||
content: std::str::from_utf8(src.1.as_bytes()[start..end].as_ref())
|
||||
.expect("From span to comment"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4796,3 +4796,103 @@ fn first_class_binop() {
|
|||
})],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_unicode_offset_1() {
|
||||
use expr::UntypedExpr::*;
|
||||
|
||||
let code = indoc! {r#"
|
||||
fn foo() {
|
||||
let x = "★"
|
||||
x
|
||||
}
|
||||
"#};
|
||||
|
||||
assert_definitions(
|
||||
code,
|
||||
vec![ast::Definition::Fn(Function {
|
||||
arguments: vec![],
|
||||
body: Sequence {
|
||||
location: Span::new((), 13..30),
|
||||
expressions: vec![
|
||||
Assignment {
|
||||
location: Span::new((), 13..26),
|
||||
value: Box::new(ByteArray {
|
||||
location: Span::new((), 21..26),
|
||||
bytes: vec![226, 152, 133],
|
||||
preferred_format: ast::ByteArrayFormatPreference::Utf8String,
|
||||
}),
|
||||
pattern: ast::Pattern::Var {
|
||||
location: Span::new((), 17..18),
|
||||
name: "x".to_string(),
|
||||
},
|
||||
kind: ast::AssignmentKind::Let,
|
||||
annotation: None,
|
||||
},
|
||||
Var {
|
||||
location: Span::new((), 29..30),
|
||||
name: "x".to_string(),
|
||||
},
|
||||
],
|
||||
},
|
||||
doc: None,
|
||||
location: Span::new((), 0..8),
|
||||
name: "foo".to_string(),
|
||||
public: false,
|
||||
return_annotation: None,
|
||||
return_type: (),
|
||||
end_position: 31,
|
||||
can_error: true,
|
||||
})],
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_unicode_offset_2() {
|
||||
use expr::UntypedExpr::*;
|
||||
|
||||
let code = indoc! {r#"
|
||||
fn foo() {
|
||||
let x = "*"
|
||||
x
|
||||
}
|
||||
"#};
|
||||
|
||||
assert_definitions(
|
||||
code,
|
||||
vec![ast::Definition::Fn(Function {
|
||||
arguments: vec![],
|
||||
body: Sequence {
|
||||
location: Span::new((), 13..28),
|
||||
expressions: vec![
|
||||
Assignment {
|
||||
location: Span::new((), 13..24),
|
||||
value: Box::new(ByteArray {
|
||||
location: Span::new((), 21..24),
|
||||
bytes: vec![42],
|
||||
preferred_format: ast::ByteArrayFormatPreference::Utf8String,
|
||||
}),
|
||||
pattern: ast::Pattern::Var {
|
||||
location: Span::new((), 17..18),
|
||||
name: "x".to_string(),
|
||||
},
|
||||
kind: ast::AssignmentKind::Let,
|
||||
annotation: None,
|
||||
},
|
||||
Var {
|
||||
location: Span::new((), 27..28),
|
||||
name: "x".to_string(),
|
||||
},
|
||||
],
|
||||
},
|
||||
doc: None,
|
||||
location: Span::new((), 0..8),
|
||||
name: "foo".to_string(),
|
||||
public: false,
|
||||
return_annotation: None,
|
||||
return_type: (),
|
||||
end_position: 29,
|
||||
can_error: true,
|
||||
})],
|
||||
)
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue