Preserve escape sequence after formatting

Bumped into this randomly. We do correctly parse escape sequence, but the format would simply but the unescaped string back on save. Now it properly re-escapes strings before flushing them back. I also removed the escape sequence for 'backspace' and 'new page' form feed as I don't see any use case for those in an Aiken program really...
2023-09-08 12:12:11 +02:00 · 2023-09-08 12:12:11 +02:00 · 8ba5946c32
parent 5cfc3de7bf
commit 8ba5946c32
7 changed files with 74 additions and 6 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -11,6 +11,7 @@
 - **uplc**: trim whitespace when loading files with hex strings to avoid confusing errors #720
 - **uplc**: uplc `Constant::Data` formatting
 - **aiken-lang**: empty records properly parse as record sugar
 - **aiken-lang**: escape sequences are now properly preserved after formatting
 - **aiken-project**: when a module name has a hyphen we should behave like rust and force an underscore
 ## v1.0.16-alpha - 2023-08-24
--- a/crates/aiken-lang/src/format.rs
+++ b/crates/aiken-lang/src/format.rs
@ -712,7 +712,9 @@ impl<'comments> Formatter<'comments> {
                .group(),
            ByteArrayFormatPreference::Utf8String => nil()
                .append("\"")
-                .append(Document::String(String::from_utf8(bytes.to_vec()).unwrap()))
+                .append(Document::String(escape(
                    &String::from_utf8(bytes.to_vec()).unwrap(),
                )))
                .append("\""),
        }
    }
@ -872,8 +874,10 @@ impl<'comments> Formatter<'comments> {
        commented(document, comments)
    }
-    fn string<'a>(&self, string: &'a String) -> Document<'a> {
+    fn string<'a>(&self, string: &'a str) -> Document<'a> {
-        let doc = "@".to_doc().append(string.to_doc().surround("\"", "\""));
+        let doc = "@"
            .to_doc()
            .append(Document::String(escape(string)).surround("\"", "\""));
        if string.contains('\n') {
            doc.force_break()
        } else {
@ -2043,3 +2047,17 @@ fn is_breakable_expr(expr: &UntypedExpr) -> bool {
            | UntypedExpr::If { .. }
    )
 }
 fn escape(string: &str) -> String {
    string
        .chars()
        .flat_map(|c| match c {
            '\n' => vec!['\\', 'n'],
            '\r' => vec!['\\', 'r'],
            '\t' => vec!['\\', 't'],
            '"' => vec!['\\', c],
            '\\' => vec!['\\', c],
            _ => vec![c],
        })
        .collect::<String>()
 }
--- a/crates/aiken-lang/src/parser/expr/bytearray.rs
+++ b/crates/aiken-lang/src/parser/expr/bytearray.rs
@ -28,4 +28,9 @@ mod tests {
    fn bytearray_utf8_encoded() {
        assert_expr!("\"aiken\"");
    }
    #[test]
    fn bytearray_utf8_escaped() {
        assert_expr!("\"\\\"aiken\\\"\"");
    }
 }
--- a/crates/aiken-lang/src/parser/expr/snapshots/bytearray_utf8_escaped.snap
+++ b/crates/aiken-lang/src/parser/expr/snapshots/bytearray_utf8_escaped.snap
@ -0,0 +1,17 @@
 ---
 source: crates/aiken-lang/src/parser/expr/bytearray.rs
 description: "Code:\n\n\"\\\"aiken\\\"\""
 ---
 ByteArray {
    location: 0..11,
    bytes: [
        34,
        97,
        105,
        107,
        101,
        110,
        34,
    ],
    preferred_format: Utf8String,
 }
--- a/crates/aiken-lang/src/parser/lexer.rs
+++ b/crates/aiken-lang/src/parser/lexer.rs
@ -196,10 +196,7 @@ pub fn lexer() -> impl Parser<char, Vec<(Token, Span)>, Error = ParseError> {
    let escape = just('\\').ignore_then(
        just('\\')
            .or(just('/'))
            .or(just('"'))
            .or(just('b').to('\x08'))
            .or(just('f').to('\x0C'))
            .or(just('n').to('\n'))
            .or(just('r').to('\r'))
            .or(just('t').to('\t')),
--- a/crates/aiken-lang/src/tests/format.rs
+++ b/crates/aiken-lang/src/tests/format.rs
@ -393,6 +393,20 @@ fn format_bytearray_literals() {
    );
 }
 #[test]
 fn escaped_utf8() {
    assert_format!(
        r#"
        const escaped_1 = "\"my_string\""
        const escaped_2 = "foo\nbar"
        const escaped_3 = "foo\rbar"
        const escaped_4 = "foo\tbar"
        const escaped_5 = "1/2"
        const escaped_6 = "1//2"
        "#
    );
 }
 #[test]
 fn format_string_literal() {
    assert_format!(
--- a/crates/aiken-lang/src/tests/snapshots/escaped_utf8.snap
+++ b/crates/aiken-lang/src/tests/snapshots/escaped_utf8.snap
@ -0,0 +1,16 @@
 ---
 source: crates/aiken-lang/src/tests/format.rs
 description: "Code:\n\nconst escaped_1 = \"\\\"my_string\\\"\"\nconst escaped_2 = \"foo\\nbar\"\nconst escaped_3 = \"foo\\rbar\"\nconst escaped_4 = \"foo\\tbar\"\nconst escaped_5 = \"1/2\"\nconst escaped_6 = \"1//2\"\n"
 ---
 const escaped_1 = "\"my_string\""
 const escaped_2 = "foo\nbar"
 const escaped_3 = "foo\rbar"
 const escaped_4 = "foo\tbar"
 const escaped_5 = "1/2"
 const escaped_6 = "1//2"