From 8ba5946c32a7dc99ae199e0e7b9948f9f361aaee Mon Sep 17 00:00:00 2001 From: KtorZ Date: Fri, 8 Sep 2023 12:12:11 +0200 Subject: [PATCH] Preserve escape sequence after formatting Bumped into this randomly. We do correctly parse escape sequence, but the format would simply but the unescaped string back on save. Now it properly re-escapes strings before flushing them back. I also removed the escape sequence for 'backspace' and 'new page' form feed as I don't see any use case for those in an Aiken program really... --- CHANGELOG.md | 1 + crates/aiken-lang/src/format.rs | 24 ++++++++++++++++--- .../aiken-lang/src/parser/expr/bytearray.rs | 5 ++++ .../snapshots/bytearray_utf8_escaped.snap | 17 +++++++++++++ crates/aiken-lang/src/parser/lexer.rs | 3 --- crates/aiken-lang/src/tests/format.rs | 14 +++++++++++ .../src/tests/snapshots/escaped_utf8.snap | 16 +++++++++++++ 7 files changed, 74 insertions(+), 6 deletions(-) create mode 100644 crates/aiken-lang/src/parser/expr/snapshots/bytearray_utf8_escaped.snap create mode 100644 crates/aiken-lang/src/tests/snapshots/escaped_utf8.snap diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b123224..6e26a4de 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ - **uplc**: trim whitespace when loading files with hex strings to avoid confusing errors #720 - **uplc**: uplc `Constant::Data` formatting - **aiken-lang**: empty records properly parse as record sugar +- **aiken-lang**: escape sequences are now properly preserved after formatting - **aiken-project**: when a module name has a hyphen we should behave like rust and force an underscore ## v1.0.16-alpha - 2023-08-24 diff --git a/crates/aiken-lang/src/format.rs b/crates/aiken-lang/src/format.rs index 98ad1b6d..7134c184 100644 --- a/crates/aiken-lang/src/format.rs +++ b/crates/aiken-lang/src/format.rs @@ -712,7 +712,9 @@ impl<'comments> Formatter<'comments> { .group(), ByteArrayFormatPreference::Utf8String => nil() .append("\"") - .append(Document::String(String::from_utf8(bytes.to_vec()).unwrap())) + .append(Document::String(escape( + &String::from_utf8(bytes.to_vec()).unwrap(), + ))) .append("\""), } } @@ -872,8 +874,10 @@ impl<'comments> Formatter<'comments> { commented(document, comments) } - fn string<'a>(&self, string: &'a String) -> Document<'a> { - let doc = "@".to_doc().append(string.to_doc().surround("\"", "\"")); + fn string<'a>(&self, string: &'a str) -> Document<'a> { + let doc = "@" + .to_doc() + .append(Document::String(escape(string)).surround("\"", "\"")); if string.contains('\n') { doc.force_break() } else { @@ -2043,3 +2047,17 @@ fn is_breakable_expr(expr: &UntypedExpr) -> bool { | UntypedExpr::If { .. } ) } + +fn escape(string: &str) -> String { + string + .chars() + .flat_map(|c| match c { + '\n' => vec!['\\', 'n'], + '\r' => vec!['\\', 'r'], + '\t' => vec!['\\', 't'], + '"' => vec!['\\', c], + '\\' => vec!['\\', c], + _ => vec![c], + }) + .collect::() +} diff --git a/crates/aiken-lang/src/parser/expr/bytearray.rs b/crates/aiken-lang/src/parser/expr/bytearray.rs index 32536880..62945b9b 100644 --- a/crates/aiken-lang/src/parser/expr/bytearray.rs +++ b/crates/aiken-lang/src/parser/expr/bytearray.rs @@ -28,4 +28,9 @@ mod tests { fn bytearray_utf8_encoded() { assert_expr!("\"aiken\""); } + + #[test] + fn bytearray_utf8_escaped() { + assert_expr!("\"\\\"aiken\\\"\""); + } } diff --git a/crates/aiken-lang/src/parser/expr/snapshots/bytearray_utf8_escaped.snap b/crates/aiken-lang/src/parser/expr/snapshots/bytearray_utf8_escaped.snap new file mode 100644 index 00000000..1f385b21 --- /dev/null +++ b/crates/aiken-lang/src/parser/expr/snapshots/bytearray_utf8_escaped.snap @@ -0,0 +1,17 @@ +--- +source: crates/aiken-lang/src/parser/expr/bytearray.rs +description: "Code:\n\n\"\\\"aiken\\\"\"" +--- +ByteArray { + location: 0..11, + bytes: [ + 34, + 97, + 105, + 107, + 101, + 110, + 34, + ], + preferred_format: Utf8String, +} diff --git a/crates/aiken-lang/src/parser/lexer.rs b/crates/aiken-lang/src/parser/lexer.rs index 2bef030f..9dea075a 100644 --- a/crates/aiken-lang/src/parser/lexer.rs +++ b/crates/aiken-lang/src/parser/lexer.rs @@ -196,10 +196,7 @@ pub fn lexer() -> impl Parser, Error = ParseError> { let escape = just('\\').ignore_then( just('\\') - .or(just('/')) .or(just('"')) - .or(just('b').to('\x08')) - .or(just('f').to('\x0C')) .or(just('n').to('\n')) .or(just('r').to('\r')) .or(just('t').to('\t')), diff --git a/crates/aiken-lang/src/tests/format.rs b/crates/aiken-lang/src/tests/format.rs index 3f57a3e1..788933d2 100644 --- a/crates/aiken-lang/src/tests/format.rs +++ b/crates/aiken-lang/src/tests/format.rs @@ -393,6 +393,20 @@ fn format_bytearray_literals() { ); } +#[test] +fn escaped_utf8() { + assert_format!( + r#" + const escaped_1 = "\"my_string\"" + const escaped_2 = "foo\nbar" + const escaped_3 = "foo\rbar" + const escaped_4 = "foo\tbar" + const escaped_5 = "1/2" + const escaped_6 = "1//2" + "# + ); +} + #[test] fn format_string_literal() { assert_format!( diff --git a/crates/aiken-lang/src/tests/snapshots/escaped_utf8.snap b/crates/aiken-lang/src/tests/snapshots/escaped_utf8.snap new file mode 100644 index 00000000..2c00689b --- /dev/null +++ b/crates/aiken-lang/src/tests/snapshots/escaped_utf8.snap @@ -0,0 +1,16 @@ +--- +source: crates/aiken-lang/src/tests/format.rs +description: "Code:\n\nconst escaped_1 = \"\\\"my_string\\\"\"\nconst escaped_2 = \"foo\\nbar\"\nconst escaped_3 = \"foo\\rbar\"\nconst escaped_4 = \"foo\\tbar\"\nconst escaped_5 = \"1/2\"\nconst escaped_6 = \"1//2\"\n" +--- +const escaped_1 = "\"my_string\"" + +const escaped_2 = "foo\nbar" + +const escaped_3 = "foo\rbar" + +const escaped_4 = "foo\tbar" + +const escaped_5 = "1/2" + +const escaped_6 = "1//2" +