Preserve escape sequence after formatting

Bumped into this randomly. We do correctly parse escape sequence, but
  the format would simply but the unescaped string back on save. Now it
  properly re-escapes strings before flushing them back. I also removed
  the escape sequence for 'backspace' and 'new page' form feed as I
  don't see any use case for those in an Aiken program really...
This commit is contained in:
KtorZ 2023-09-08 12:12:11 +02:00
parent 5cfc3de7bf
commit 8ba5946c32
No known key found for this signature in database
GPG Key ID: 33173CB6F77F4277
7 changed files with 74 additions and 6 deletions

View File

@ -11,6 +11,7 @@
- **uplc**: trim whitespace when loading files with hex strings to avoid confusing errors #720 - **uplc**: trim whitespace when loading files with hex strings to avoid confusing errors #720
- **uplc**: uplc `Constant::Data` formatting - **uplc**: uplc `Constant::Data` formatting
- **aiken-lang**: empty records properly parse as record sugar - **aiken-lang**: empty records properly parse as record sugar
- **aiken-lang**: escape sequences are now properly preserved after formatting
- **aiken-project**: when a module name has a hyphen we should behave like rust and force an underscore - **aiken-project**: when a module name has a hyphen we should behave like rust and force an underscore
## v1.0.16-alpha - 2023-08-24 ## v1.0.16-alpha - 2023-08-24

View File

@ -712,7 +712,9 @@ impl<'comments> Formatter<'comments> {
.group(), .group(),
ByteArrayFormatPreference::Utf8String => nil() ByteArrayFormatPreference::Utf8String => nil()
.append("\"") .append("\"")
.append(Document::String(String::from_utf8(bytes.to_vec()).unwrap())) .append(Document::String(escape(
&String::from_utf8(bytes.to_vec()).unwrap(),
)))
.append("\""), .append("\""),
} }
} }
@ -872,8 +874,10 @@ impl<'comments> Formatter<'comments> {
commented(document, comments) commented(document, comments)
} }
fn string<'a>(&self, string: &'a String) -> Document<'a> { fn string<'a>(&self, string: &'a str) -> Document<'a> {
let doc = "@".to_doc().append(string.to_doc().surround("\"", "\"")); let doc = "@"
.to_doc()
.append(Document::String(escape(string)).surround("\"", "\""));
if string.contains('\n') { if string.contains('\n') {
doc.force_break() doc.force_break()
} else { } else {
@ -2043,3 +2047,17 @@ fn is_breakable_expr(expr: &UntypedExpr) -> bool {
| UntypedExpr::If { .. } | UntypedExpr::If { .. }
) )
} }
fn escape(string: &str) -> String {
string
.chars()
.flat_map(|c| match c {
'\n' => vec!['\\', 'n'],
'\r' => vec!['\\', 'r'],
'\t' => vec!['\\', 't'],
'"' => vec!['\\', c],
'\\' => vec!['\\', c],
_ => vec![c],
})
.collect::<String>()
}

View File

@ -28,4 +28,9 @@ mod tests {
fn bytearray_utf8_encoded() { fn bytearray_utf8_encoded() {
assert_expr!("\"aiken\""); assert_expr!("\"aiken\"");
} }
#[test]
fn bytearray_utf8_escaped() {
assert_expr!("\"\\\"aiken\\\"\"");
}
} }

View File

@ -0,0 +1,17 @@
---
source: crates/aiken-lang/src/parser/expr/bytearray.rs
description: "Code:\n\n\"\\\"aiken\\\"\""
---
ByteArray {
location: 0..11,
bytes: [
34,
97,
105,
107,
101,
110,
34,
],
preferred_format: Utf8String,
}

View File

@ -196,10 +196,7 @@ pub fn lexer() -> impl Parser<char, Vec<(Token, Span)>, Error = ParseError> {
let escape = just('\\').ignore_then( let escape = just('\\').ignore_then(
just('\\') just('\\')
.or(just('/'))
.or(just('"')) .or(just('"'))
.or(just('b').to('\x08'))
.or(just('f').to('\x0C'))
.or(just('n').to('\n')) .or(just('n').to('\n'))
.or(just('r').to('\r')) .or(just('r').to('\r'))
.or(just('t').to('\t')), .or(just('t').to('\t')),

View File

@ -393,6 +393,20 @@ fn format_bytearray_literals() {
); );
} }
#[test]
fn escaped_utf8() {
assert_format!(
r#"
const escaped_1 = "\"my_string\""
const escaped_2 = "foo\nbar"
const escaped_3 = "foo\rbar"
const escaped_4 = "foo\tbar"
const escaped_5 = "1/2"
const escaped_6 = "1//2"
"#
);
}
#[test] #[test]
fn format_string_literal() { fn format_string_literal() {
assert_format!( assert_format!(

View File

@ -0,0 +1,16 @@
---
source: crates/aiken-lang/src/tests/format.rs
description: "Code:\n\nconst escaped_1 = \"\\\"my_string\\\"\"\nconst escaped_2 = \"foo\\nbar\"\nconst escaped_3 = \"foo\\rbar\"\nconst escaped_4 = \"foo\\tbar\"\nconst escaped_5 = \"1/2\"\nconst escaped_6 = \"1//2\"\n"
---
const escaped_1 = "\"my_string\""
const escaped_2 = "foo\nbar"
const escaped_3 = "foo\rbar"
const escaped_4 = "foo\tbar"
const escaped_5 = "1/2"
const escaped_6 = "1//2"