diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b123224..6e26a4de 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ - **uplc**: trim whitespace when loading files with hex strings to avoid confusing errors #720 - **uplc**: uplc `Constant::Data` formatting - **aiken-lang**: empty records properly parse as record sugar +- **aiken-lang**: escape sequences are now properly preserved after formatting - **aiken-project**: when a module name has a hyphen we should behave like rust and force an underscore ## v1.0.16-alpha - 2023-08-24 diff --git a/crates/aiken-lang/src/format.rs b/crates/aiken-lang/src/format.rs index 98ad1b6d..7134c184 100644 --- a/crates/aiken-lang/src/format.rs +++ b/crates/aiken-lang/src/format.rs @@ -712,7 +712,9 @@ impl<'comments> Formatter<'comments> { .group(), ByteArrayFormatPreference::Utf8String => nil() .append("\"") - .append(Document::String(String::from_utf8(bytes.to_vec()).unwrap())) + .append(Document::String(escape( + &String::from_utf8(bytes.to_vec()).unwrap(), + ))) .append("\""), } } @@ -872,8 +874,10 @@ impl<'comments> Formatter<'comments> { commented(document, comments) } - fn string<'a>(&self, string: &'a String) -> Document<'a> { - let doc = "@".to_doc().append(string.to_doc().surround("\"", "\"")); + fn string<'a>(&self, string: &'a str) -> Document<'a> { + let doc = "@" + .to_doc() + .append(Document::String(escape(string)).surround("\"", "\"")); if string.contains('\n') { doc.force_break() } else { @@ -2043,3 +2047,17 @@ fn is_breakable_expr(expr: &UntypedExpr) -> bool { | UntypedExpr::If { .. } ) } + +fn escape(string: &str) -> String { + string + .chars() + .flat_map(|c| match c { + '\n' => vec!['\\', 'n'], + '\r' => vec!['\\', 'r'], + '\t' => vec!['\\', 't'], + '"' => vec!['\\', c], + '\\' => vec!['\\', c], + _ => vec![c], + }) + .collect::() +} diff --git a/crates/aiken-lang/src/parser/expr/bytearray.rs b/crates/aiken-lang/src/parser/expr/bytearray.rs index 32536880..62945b9b 100644 --- a/crates/aiken-lang/src/parser/expr/bytearray.rs +++ b/crates/aiken-lang/src/parser/expr/bytearray.rs @@ -28,4 +28,9 @@ mod tests { fn bytearray_utf8_encoded() { assert_expr!("\"aiken\""); } + + #[test] + fn bytearray_utf8_escaped() { + assert_expr!("\"\\\"aiken\\\"\""); + } } diff --git a/crates/aiken-lang/src/parser/expr/snapshots/bytearray_utf8_escaped.snap b/crates/aiken-lang/src/parser/expr/snapshots/bytearray_utf8_escaped.snap new file mode 100644 index 00000000..1f385b21 --- /dev/null +++ b/crates/aiken-lang/src/parser/expr/snapshots/bytearray_utf8_escaped.snap @@ -0,0 +1,17 @@ +--- +source: crates/aiken-lang/src/parser/expr/bytearray.rs +description: "Code:\n\n\"\\\"aiken\\\"\"" +--- +ByteArray { + location: 0..11, + bytes: [ + 34, + 97, + 105, + 107, + 101, + 110, + 34, + ], + preferred_format: Utf8String, +} diff --git a/crates/aiken-lang/src/parser/lexer.rs b/crates/aiken-lang/src/parser/lexer.rs index 2bef030f..9dea075a 100644 --- a/crates/aiken-lang/src/parser/lexer.rs +++ b/crates/aiken-lang/src/parser/lexer.rs @@ -196,10 +196,7 @@ pub fn lexer() -> impl Parser, Error = ParseError> { let escape = just('\\').ignore_then( just('\\') - .or(just('/')) .or(just('"')) - .or(just('b').to('\x08')) - .or(just('f').to('\x0C')) .or(just('n').to('\n')) .or(just('r').to('\r')) .or(just('t').to('\t')), diff --git a/crates/aiken-lang/src/tests/format.rs b/crates/aiken-lang/src/tests/format.rs index 3f57a3e1..788933d2 100644 --- a/crates/aiken-lang/src/tests/format.rs +++ b/crates/aiken-lang/src/tests/format.rs @@ -393,6 +393,20 @@ fn format_bytearray_literals() { ); } +#[test] +fn escaped_utf8() { + assert_format!( + r#" + const escaped_1 = "\"my_string\"" + const escaped_2 = "foo\nbar" + const escaped_3 = "foo\rbar" + const escaped_4 = "foo\tbar" + const escaped_5 = "1/2" + const escaped_6 = "1//2" + "# + ); +} + #[test] fn format_string_literal() { assert_format!( diff --git a/crates/aiken-lang/src/tests/snapshots/escaped_utf8.snap b/crates/aiken-lang/src/tests/snapshots/escaped_utf8.snap new file mode 100644 index 00000000..2c00689b --- /dev/null +++ b/crates/aiken-lang/src/tests/snapshots/escaped_utf8.snap @@ -0,0 +1,16 @@ +--- +source: crates/aiken-lang/src/tests/format.rs +description: "Code:\n\nconst escaped_1 = \"\\\"my_string\\\"\"\nconst escaped_2 = \"foo\\nbar\"\nconst escaped_3 = \"foo\\rbar\"\nconst escaped_4 = \"foo\\tbar\"\nconst escaped_5 = \"1/2\"\nconst escaped_6 = \"1//2\"\n" +--- +const escaped_1 = "\"my_string\"" + +const escaped_2 = "foo\nbar" + +const escaped_3 = "foo\rbar" + +const escaped_4 = "foo\tbar" + +const escaped_5 = "1/2" + +const escaped_6 = "1//2" +